Skip to the content.

CNR Bioinformatics Workshop

Back to curriculum

4. Visualization

4.1 Load libraries

library(ggplot2)
library(Seurat)
library(dplyr)

4.2 Load data

DEX <- read.csv("DEX.csv")
head(DEX)
A data.frame: 6 × 8
Xp_valavg_log2FCpct.1pct.2p_val_adjclustergene
<chr><dbl><dbl><dbl><dbl><dbl><int><chr>
1THSD7A 6.045978e-1961.8697360.9360.3449.984329e-1920THSD7A
2CDH7 8.096795e-1881.8878890.6770.0881.337105e-1830CDH7
3CADM1 9.910354e-1651.4515530.9610.5941.636596e-1600CADM1
4NTS 1.028108e-1562.3811700.7530.1921.697817e-1520NTS
5LINC006436.132768e-1561.2198940.8210.2191.012765e-1510LINC00643
6CNTNAP2 1.002729e-1461.3381180.9750.5781.655906e-1420CNTNAP2
df0 <- readRDS("df0.rds")

4.3 Feature plot

genes <- head(DEX)$gene

Make Feature plots with Seurat function FeaturePlot

options(repr.plot.width = 6, repr.plot.height = 9)
FeaturePlot(df0, features = genes)

png

Use NoLegend function to remove function

options(repr.plot.width = 6, repr.plot.height = 9)
FeaturePlot(df0, features = genes) + NoLegend()

png

Use combine = F to create a list of plots that you can modify individually

plist <- FeaturePlot(df0, features = genes, combine = F)
typeof(plist)

‘list’

Use apply to change each object from the list one at a time

plist <- lapply(plist, function(x){
    x = x + NoLegend()
})

Use do.call and gridExtra to present figures

options(repr.plot.width = 6, repr.plot.height = 9)
do.call(gridExtra::grid.arrange, c(plist, ncol = 2))

png

Remove axes and change color scale

plist <- lapply(plist, function(x){
    x = x + scale_color_gradientn(colours = c("grey", RColorBrewer::brewer.pal(9, "YlOrRd"))) +
    theme_void()
    return(x)
})

4.4 Volcano plot

range(DEX$avg_log2FC)

<ol class=list-inline><li>0.250067637009388</li><li>5.11402342406602</li></ol>

options(repr.plot.width = 5, repr.plot.height = 5)
ggplot(DEX, aes(x = avg_log2FC, y = -log10(p_val_adj))) +
    geom_point()

png

Use different ggplot2 themes

options(repr.plot.width = 5, repr.plot.height = 5)
ggplot(DEX, aes(x = avg_log2FC, y = -log10(p_val_adj))) +
    geom_point() +
    theme_classic()

png

Use facet wrap to separate different clusters into individual plots

options(repr.plot.width = 9, repr.plot.height = 9)
ggplot(DEX, aes(x = avg_log2FC, y = -log10(p_val_adj))) +
    geom_point() + 
    facet_wrap(~cluster) +
    theme_classic()

png

Use dplyr to subset data

options(repr.plot.width = 9, repr.plot.height = 9)
ggplot() +
    DEX %>% 
        filter(avg_log2FC > 0) %>%
    geom_point(mapping = aes(x = avg_log2FC, y = -log10(p_val_adj)), data = .) + 
    facet_wrap(~cluster) +
    theme_classic()

png

Use different colors

options(repr.plot.width = 9, repr.plot.height = 9)
ggplot() +
    DEX %>% 
        filter(avg_log2FC > 0) %>%
    geom_point(mapping = aes(x = avg_log2FC, y = -log10(p_val_adj)), data = ., color = "red") +
    DEX %>% 
        filter(avg_log2FC < 0) %>%
    geom_point(mapping = aes(x = avg_log2FC, y = -log10(p_val_adj)), data = ., color = "green") +
    facet_wrap(~cluster) +
    theme_classic()

png

4.5 Violin plots

genes

<ol class=list-inline><li>‘THSD7A’</li><li>‘CDH7’</li><li>‘CADM1’</li><li>‘NTS’</li><li>‘LINC00643’</li><li>‘CNTNAP2’</li></ol>

data.to.plot <- as.data.frame(t(as.matrix(df0@assays$RNA@data[genes,])))
data.to.plot$cluster <- df0$seurat_clusters
head(data.to.plot)
A data.frame: 6 × 7
THSD7ACDH7CADM1NTSLINC00643CNTNAP2cluster
<dbl><dbl><dbl><dbl><dbl><dbl><fct>
AAACCCAGTCTCAGGC-12.52191091.1256591.4166723.47301350.71350082.5219110
AAACCCAGTGACTATC-11.79669310.0000002.4903320.69611031.39073571.3907364
AAACCCATCCCATTTA-10.00000000.0000000.0000001.23778870.00000001.7742684
AAACCCATCTGAGAAA-10.62871370.0000001.0117440.00000000.00000001.0117444
AAACGAACAATCTGCA-10.00000000.0000000.0000000.00000000.00000000.0000003
AAACGCTCAAATGGAT-11.17737580.0000001.3849084.10905591.83102911.7032364
data.to.plot2 <- reshape2::melt(data.to.plot, id.vars = c("cluster"))
head(data.to.plot2)
A data.frame: 6 × 3
clustervariablevalue
<fct><fct><dbl>
10THSD7A2.5219109
24THSD7A1.7966931
34THSD7A0.0000000
44THSD7A0.6287137
53THSD7A0.0000000
64THSD7A1.1773758
options(repr.plot.width = 9, repr.plot.height = 3)
ggplot(data.to.plot2, aes(x = variable, y = value)) +
    geom_violin()

png

Use scale = width

options(repr.plot.width = 9, repr.plot.height = 3)
ggplot(data.to.plot2, aes(x = cluster, y = value)) +
    geom_violin(scale = "width") +
    theme_classic()

png

Use scale = width

options(repr.plot.width = 9, repr.plot.height = 3)
ggplot(data.to.plot2, aes(x = cluster, y = value)) +
    geom_violin(scale = "width") +
    theme_classic()

png

Use fill to add color

options(repr.plot.width = 9, repr.plot.height = 3)
ggplot(data.to.plot2, aes(x = cluster, y = value, fill = cluster)) +
    geom_violin(scale = "width") +
    theme_classic()

png

Use facet wrap to show each gene individually

options(repr.plot.width = 9, repr.plot.height = 3)
ggplot(data.to.plot2, aes(x = cluster, y = value, fill = cluster)) +
    geom_violin(scale = "width") +
    facet_wrap(~variable) +
    theme_classic()

png

Pick the most significant DEX gene from each cluster

DEX %>%
    group_by(cluster) %>%
    top_n(-p_val_adj, n = 1)
A grouped_df: 8 × 8
Xp_valavg_log2FCpct.1pct.2p_val_adjclustergene
<chr><dbl><dbl><dbl><dbl><dbl><int><chr>
THSD7A 6.045978e-196 1.8697360.9360.3449.984329e-1920THSD7A
CSRP2 4.887031e-134 2.0701270.9940.8148.070443e-1301CSRP2
SOX2.2 1.213735e-136 1.5518590.8920.1862.004362e-1322SOX2
CKB.2 9.850717e-67-1.4379220.8610.982 1.626747e-623CKB
MEF2C.3 6.308674e-79 1.5808871.0000.508 1.041814e-744MEF2C
MKI67 1.433349e-265 2.6528640.6900.0112.367033e-2615MKI67
EDNRB.12.275104e-292 2.7982850.8390.0193.757107e-2886EDNRB
EOMES.11.183226e-181 2.7537430.8990.0371.953980e-1777EOMES
DEX %>%
    group_by(cluster) %>%
    top_n(-p_val_adj, n = 1) %>%
    select(gene)
Adding missing grouping variables: `cluster`
A grouped_df: 8 × 2
clustergene
<int><chr>
0THSD7A
1CSRP2
2SOX2
3CKB
4MEF2C
5MKI67
6EDNRB
7EOMES
gene <- DEX %>%
    group_by(cluster) %>%
    top_n(-p_val_adj, n = 1) %>%
    select(gene)
Adding missing grouping variables: `cluster`
data.to.plot <- as.data.frame(t(as.matrix(df0@assays$RNA@data[gene$gene,])))
data.to.plot$cluster <- df0$seurat_clusters
data.to.plot2 <- reshape2::melt(data.to.plot, id.vars = c("cluster"))
options(repr.plot.width = 3, repr.plot.height = 9)
ggplot(data.to.plot2, aes(x = cluster, y = value, fill = cluster)) +
    geom_violin(scale = "width") +
    facet_wrap(~variable, ncol = 1) +
    theme_classic()

png

4.6 Install and run Cerebro

install.packages("remotes")
package 'remotes' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
	C:\Users\ZLI2\AppData\Local\Temp\3\Rtmp6hrUI3\downloaded_packages
BiocManager::install("romanhaa/cerebroApp")
'getOption("repos")' replaces Bioconductor standard repositories, see
'?repositories' for details

replacement repositories:
    CRAN: https://cran.r-project.org


Bioconductor version 3.14 (BiocManager 1.30.18), R 4.1.3 (2022-03-10)

Installing github package(s) 'romanhaa/cerebroApp'

Downloading GitHub repo romanhaa/cerebroApp@HEAD



Rcpp  (1.0.8.3 -> 1.0.9) [CRAN]
later (1.2.0   -> 1.3.0) [CRAN]


Installing 2 packages: Rcpp, later




  There is a binary version available but the source version is later:
      binary source needs_compilation
Rcpp 1.0.8.3  1.0.9              TRUE

  Binaries will be installed
package 'Rcpp' successfully unpacked and MD5 sums checked


Warning message:
"cannot remove prior installation of package 'Rcpp'"
Warning message in file.copy(savedcopy, lib, recursive = TRUE):
"problem copying C:\Users\ZLI2\Miniconda3\envs\myEnv\lib\R\library\00LOCK\Rcpp\libs\x64\Rcpp.dll to C:\Users\ZLI2\Miniconda3\envs\myEnv\lib\R\library\Rcpp\libs\x64\Rcpp.dll: Permission denied"
Warning message:
"restored 'Rcpp'"


package 'later' successfully unpacked and MD5 sums checked


Warning message:
"cannot remove prior installation of package 'later'"
Warning message in file.copy(savedcopy, lib, recursive = TRUE):
"problem copying C:\Users\ZLI2\Miniconda3\envs\myEnv\lib\R\library\00LOCK\later\libs\x64\later.dll to C:\Users\ZLI2\Miniconda3\envs\myEnv\lib\R\library\later\libs\x64\later.dll: Permission denied"
Warning message:
"restored 'later'"



The downloaded binary packages are in
	C:\Users\ZLI2\AppData\Local\Temp\3\Rtmp6hrUI3\downloaded_packages


Running `R CMD build`...



* checking for file 'C:\Users\ZLI2\AppData\Local\Temp\3\Rtmp6hrUI3\remotes944065c13363\romanhaa-cerebroApp-0de48b6/DESCRIPTION' ... OK
* preparing 'cerebroApp':
* checking DESCRIPTION meta-information ... OK
* checking for LF line-endings in source and make files and shell scripts
* checking for empty or unneeded directories
Omitted 'LazyData' from DESCRIPTION
* building 'cerebroApp_1.3.1.tar.gz'


Old packages: 'ica', 'later', 'Rcpp'
cerebroApp::exportFromSeurat(nGene = "nFeature_RNA",
                            nUMI = "nCount_RNA",
                            groups = "seurat_clusters",
                            object = df0,
                            assay = "RNA",
                            file = "df0.crb",
                            experiment_name = "workshop",
                            organism = "Human")
[15:52:47] Start collecting data...

[15:52:47] Overview of Cerebro object:


class: Cerebro_v1.3
cerebroApp version: 1.3.1
experiment name: workshop
organism: Human
date of analysis: 
date of export: 2022-07-10
number of cells: 2,125
number of genes: 16,514
grouping variables (1): seurat_clusters
cell cycle variables (0): 
projections (1): umap
trees (0): 
most expressed genes: 
marker genes:
enriched pathways:
trajectories:
extra material:


[15:52:47] Saving Cerebro object to: df0.crb

[15:52:52] Done!
cerebroApp::launchCerebro()
##---------------------------------------------------------------------------##
## Launching Cerebro v1.3
##---------------------------------------------------------------------------##

Loading required package: shiny


Listening on http://127.0.0.1:6565

class: Cerebro_v1.3
cerebroApp version: 1.3.0
experiment name: pbmc_10k_v3
organism: hg
date of analysis: 2020-09-21
date of export: 2020-09-21
number of cells: 501
number of genes: 1,000
grouping variables (3): sample, seurat_clusters, cell_type_singler_blueprintencode_main
cell cycle variables (1): Phase
projections (4): tSNE, tSNE_3D, UMAP, UMAP_3D
trees (3): sample, seurat_clusters, cell_type_singler_blueprintencode_main
most expressed genes: sample, seurat_clusters, cell_type_singler_blueprintencode_main
marker genes:
  - cerebro_seurat (3): sample, seurat_clusters, cell_type_singler_blueprintencode_main
enriched pathways:
  - cerebro_seurat_enrichr (3): sample, seurat_clusters, cell_type_singler_blueprintencode_main, 
  - cerebro_GSVA (3): sample, seurat_clusters, cell_type_singler_blueprintencode_main
trajectories:
  - monocle2 (2): all_cells, subset_of_cells
extra material:
  - tables (1): SingleR_results




class: Cerebro_v1.3
cerebroApp version: 1.3.1
experiment name: workshop
organism: Human
date of analysis: 
date of export: 2022-07-10
number of cells: 2,125
number of genes: 16,514
grouping variables (1): seurat_clusters
cell cycle variables (0): 
projections (1): umap
trees (0): 
most expressed genes: 
marker genes:
enriched pathways:
trajectories:
extra material:




Warning message:
"`autoHideNavigation` only works with DT client mode and it will be ignored"
Warning message:
"The select input "expression_genes_input" contains a large number of options; consider using server-side selectize for massively improved performance. See the Details section of the ?selectizeInput help topic."
Warning message:
"`autoHideNavigation` only works with DT client mode and it will be ignored"
Warning message:
"`autoHideNavigation` only works with DT client mode and it will be ignored"
Warning message:
"`autoHideNavigation` only works with DT client mode and it will be ignored"
Warning message:
"`autoHideNavigation` only works with DT client mode and it will be ignored"

CerebroApp

« Previous