This document takes the QCd, normalized, scaled, batch corrected, clustered data and performs differential expression analysis.
#Install and load required R packages. Install the packages if you do not already have them installed
#BiocManager::install('multtest')
#install.packages('metap')
library(Seurat)
library(kableExtra)
library(ggplot2)
library(plotly)
library(tidyverse)
library(rmarkdown)
library(cowplot)
# Load QCd, normalized data
load('/data/../TotalSeq/Results/clustering/SeuratObjectAfterClustering.RData')
# Input path
datadirectory <- "/data/../TotalSeq/Results/clustering/"
# Output path
outputpath="/data/../TotalSeq/Results/markers/"
# Set working directory
#setwd(outputpath)
########################### Marker genes between groups
# Current identity levels
levels(allsamples)
## [1] "0" "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12"
head(Idents(allsamples))
## AAACCCACACTGGAAG-NS3R189BTS AAACCCACATGACGTT-NS3R189BTS
## 2 1
## AAACCCAGTCCAAATC-NS3R189BTS AAACCCAGTGAGTTTC-NS3R189BTS
## 0 1
## AAACCCAGTTCTTAGG-NS3R189BTS AAACCCATCAGCTGAT-NS3R189BTS
## 0 1
## Levels: 0 1 2 3 4 5 6 7 8 9 10 11 12
# Change identity to HC vs BCR-UV
Idents(allsamples) <- "type"
head(Idents(allsamples))
## AAACCCACACTGGAAG-NS3R189BTS AAACCCACATGACGTT-NS3R189BTS
## HC HC
## AAACCCAGTCCAAATC-NS3R189BTS AAACCCAGTGAGTTTC-NS3R189BTS
## HC HC
## AAACCCAGTTCTTAGG-NS3R189BTS AAACCCATCAGCTGAT-NS3R189BTS
## HC HC
## Levels: HC BCR-UV
levels(allsamples)
## [1] "HC" "BCR-UV"
########################### tables
# Proportion and counts of cells in clusters and groups
prop.table(table(Idents(allsamples)))
##
## HC BCR-UV
## 0.6502368 0.3497632
table(Idents(allsamples))
##
## HC BCR-UV
## 19630 10559
# Find differentially expressed genes for UV vs BCR-UV
markershcbcruv = FindMarkers(allsamples, ident.1="BCR-UV", ident.2="HC")
head(markershcbcruv)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## EPB41 0 -0.8413380 0.545 0.641 0
## SF3B4 0 0.3139616 0.237 0.075 0
## C1orf56 0 -1.2897434 0.197 0.414 0
## S100A9 0 -1.3083059 0.101 0.477 0
## S100A8 0 -1.1964336 0.064 0.367 0
## RPS27 0 0.9883480 1.000 0.998 0
# Set marker rownames
markershcbcruv$gene=rownames(markershcbcruv)
# Save the list of differentially expressed genes as a tab delimited text file
write.table(markershcbcruv, file=paste(outputpath,"markershcbcruv.txt",sep=""), quote=F, row.names = FALSE, sep = "\t")
VlnPlot(allsamples,features=c("HLA-B","TMSB4X"), pt.size = 0)
FeaturePlot(allsamples, features="HLA-B")
table(allsamples$type)
##
## BCR-UV HC
## 10559 19630
# Get all the expression count matrix
#head(GetAssayData(WT_TG_Filt_Scaled, slot="counts"))
# Get expression values for a single gene from all cells
head(FetchData(allsamples, vars="HLA-B"))
## HLA-B
## AAACCCACACTGGAAG-NS3R189BTS 2.136984
## AAACCCACATGACGTT-NS3R189BTS 2.900033
## AAACCCAGTCCAAATC-NS3R189BTS 2.285611
## AAACCCAGTGAGTTTC-NS3R189BTS 3.778567
## AAACCCAGTTCTTAGG-NS3R189BTS 3.275078
## AAACCCATCAGCTGAT-NS3R189BTS 3.382402
########################### tables
# Proportion and counts of cells in clusters and groups
# Change identity to 0.25 cluster resolution
Idents(allsamples) <- "RNA_snn_res.0.25"
prop.table(table(Idents(allsamples)))
##
## 0 1 2 3 4 5
## 0.3756997582 0.1352479380 0.1239524330 0.0682367750 0.0675411574 0.0620424658
## 6 7 8 9 10 11
## 0.0529994369 0.0480307397 0.0273278346 0.0216635198 0.0114611282 0.0049024479
## 12
## 0.0008943655
table(Idents(allsamples), allsamples$type)
##
## BCR-UV HC
## 0 3460 7882
## 1 15 4068
## 2 356 3386
## 3 1300 760
## 4 1970 69
## 5 891 982
## 6 535 1065
## 7 755 695
## 8 628 197
## 9 404 250
## 10 191 155
## 11 44 104
## 12 10 17
########################### Marker genes between clusters
# Current identity levels
levels(allsamples)
## [1] "0" "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12"
head(Idents(allsamples))
## AAACCCACACTGGAAG-NS3R189BTS AAACCCACATGACGTT-NS3R189BTS
## 2 1
## AAACCCAGTCCAAATC-NS3R189BTS AAACCCAGTGAGTTTC-NS3R189BTS
## 0 1
## AAACCCAGTTCTTAGG-NS3R189BTS AAACCCATCAGCTGAT-NS3R189BTS
## 0 1
## Levels: 0 1 2 3 4 5 6 7 8 9 10 11 12
# Find differentially expressed genes for cluster 0 vs 1
markerscluster = FindMarkers(allsamples, ident.1=0, ident.2=1)
head(markerscluster)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## S100A9 0 -1.7039381 0.361 0.896 0
## S100A8 0 -1.4970085 0.245 0.741 0
## FCER1G 0 -1.2172374 0.148 0.520 0
## GNLY 0 -0.9698375 0.471 0.843 0
## IGKC 0 -3.9915160 0.170 0.466 0
## IL1B 0 -1.5641778 0.224 0.711 0
write.table(markerscluster, file=paste(outputpath,"markerscluster.txt",sep=""), quote=F, row.names = FALSE, sep = "\t")
VlnPlot(allsamples,features=c("TNFRSF4","TNFRSF25"), pt.size = 0)
FeaturePlot(allsamples, features="TNFRSF25")
table(allsamples$orig.ident)
##
## NS3R189BTS NS7R65BBTS
## 19630 10559
# Get all the expression count matrix
#head(GetAssayData(WT_TG_Filt_Scaled, slot="counts"))
# Get expression values for a single gene from all cells
head(FetchData(allsamples, vars="TNFRSF25"))
## TNFRSF25
## AAACCCACACTGGAAG-NS3R189BTS 0
## AAACCCACATGACGTT-NS3R189BTS 0
## AAACCCAGTCCAAATC-NS3R189BTS 0
## AAACCCAGTGAGTTTC-NS3R189BTS 0
## AAACCCAGTTCTTAGG-NS3R189BTS 0
## AAACCCATCAGCTGAT-NS3R189BTS 0
########################### Find all markers
markersall=FindAllMarkers(allsamples)
head(markersall)
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## ANXA1 0 0.7068765 0.968 0.858 0 0 ANXA1
## LMNA 0 0.6774440 0.881 0.725 0 0 LMNA
## NIBAN1 0 0.5952121 0.635 0.398 0 0 NIBAN1
## LDLRAD4 0 0.5930851 0.726 0.553 0 0 LDLRAD4
## IL7R 0 0.5203522 0.874 0.678 0 0 IL7R
## S100A11 0 0.4987982 0.867 0.716 0 0 S100A11
#View(markersall)
# Save all markers as tab delimited file
write.table(markersall, file=paste(outputpath,"markersall.txt",sep=""),quote=F, row.names = FALSE, sep = ",")
# Plot selected markers
VlnPlot(allsamples,features=c("CYP1B1","IL1B"), pt.size = 0)
FeaturePlot(allsamples, features="IL1B")
# Extract the top marker for each cluster
head(markersall)
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## ANXA1 0 0.7068765 0.968 0.858 0 0 ANXA1
## LMNA 0 0.6774440 0.881 0.725 0 0 LMNA
## NIBAN1 0 0.5952121 0.635 0.398 0 0 NIBAN1
## LDLRAD4 0 0.5930851 0.726 0.553 0 0 LDLRAD4
## IL7R 0 0.5203522 0.874 0.678 0 0 IL7R
## S100A11 0 0.4987982 0.867 0.716 0 0 S100A11
markersall %>%
group_by(cluster) %>%
slice(1) %>%
pull(gene) -> best.gene.per.cluster
best.gene.per.cluster
## [1] "ANXA1" "IGHA1" "LEF1" "CCL5" "RPS26" "GNLY" "S100A9"
## [8] "CD79A" "KLRB1" "IKZF2" "PPBP" "PLEKHA6" "RHEX"
# Store plot as ggplot list
vlnplot1<-VlnPlot(allsamples,features=best.gene.per.cluster, pt.size = 0, combine = FALSE)
# vlnplot1<-VlnPlot(allsamples,features=best.gene.per.cluster, pt.size = 0, same.y.lims = TRUE, combine = FALSE)
# Set properties for each plot in the list
p1 <- list()
for (i in seq_along(vlnplot1)){
#Change x and y tick label font size.
p1[[i]] = vlnplot1[[i]] +
theme(legend.position = "none",
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 8),
axis.title.y = element_text(size = 6),
axis.title.x = element_blank(),
title = element_text(size=8))
}
# Plot all the plots in the list
plot_grid(plotlist = p1, ncol = 3)
# Save plot to file
vlnplot1=plot_grid(plotlist = p1, ncol = 3)
ggsave(paste(outputpath,"vlnplot-best-per-cluster.pdf",sep=""), plot = vlnplot1, width = 20, height = 15, units = "cm")
# Plot features in umap
fplot1<-FeaturePlot(allsamples,features=best.gene.per.cluster, pt.size = 0, combine = FALSE)
p2 <- list()
for (i in seq_along(fplot1)){
#Change x and y label font size.
p2[[i]] = fplot1[[i]] +
theme(legend.position = "none",
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 8),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
title = element_text(size=8))
}
plot_grid(plotlist = p2, ncol = 4)
fplot1=plot_grid(plotlist = p2, ncol = 4)
ggsave(paste(outputpath,"fplot-best-per-cluster.pdf",sep=""), plot = fplot1, width = 20, height = 15, units = "cm")
########################### tables
# Proportion and counts of cells in clusters and groups
# Change identity to 0.25 cluster resolution
Idents(allsamples) <- "RNA_snn_res.0.25"
prop.table(table(Idents(allsamples)))
##
## 0 1 2 3 4 5
## 0.3756997582 0.1352479380 0.1239524330 0.0682367750 0.0675411574 0.0620424658
## 6 7 8 9 10 11
## 0.0529994369 0.0480307397 0.0273278346 0.0216635198 0.0114611282 0.0049024479
## 12
## 0.0008943655
table(Idents(allsamples))
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12
## 11342 4083 3742 2060 2039 1873 1600 1450 825 654 346 148 27
# Current identity levels
levels(allsamples)
## [1] "0" "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12"
head(Idents(allsamples))
## AAACCCACACTGGAAG-NS3R189BTS AAACCCACATGACGTT-NS3R189BTS
## 2 1
## AAACCCAGTCCAAATC-NS3R189BTS AAACCCAGTGAGTTTC-NS3R189BTS
## 0 1
## AAACCCAGTTCTTAGG-NS3R189BTS AAACCCATCAGCTGAT-NS3R189BTS
## 0 1
## Levels: 0 1 2 3 4 5 6 7 8 9 10 11 12
# Take all cells in cluster 1, and find markers that separate 'HC' and 'BCR-UV' that are in the 'orig.ident' column. subset.ident would the default identity of clusters
markersingroup <- FindMarkers(allsamples, ident.1 = "HC", ident.2 = "BCR-UV", group.by = 'type', subset.ident = 0)
head(markersingroup)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## S100A11 0 -0.8963638 0.820 0.973 0
## S100A9 0 2.4467055 0.497 0.051 0
## S100A8 0 1.8369587 0.346 0.016 0
## RPS27 0 -1.1221617 0.999 0.999 0
## LMNA 0 -1.3431421 0.833 0.989 0
## RPS7 0 -0.5535502 0.985 0.999 0
# Save markers
write.table(markersingroup, file="/data/../TotalSeq/Results/markers/markersingroup.txt", quote=F, row.names = FALSE, sep = "\t")
VlnPlot(allsamples,features=c("HLA-B","IFITM1"), group.by = "type", pt.size = 0)
FeaturePlot(allsamples, features=c("HLA-B"), split.by = "type")
table(allsamples$type)
##
## BCR-UV HC
## 10559 19630
# Get all the expression count matrix
#head(GetAssayData(allsamples, slot="counts"))
# Get expression values for a single gene from all cells
head(FetchData(allsamples, vars="HLA-B"))
## HLA-B
## AAACCCACACTGGAAG-NS3R189BTS 2.136984
## AAACCCACATGACGTT-NS3R189BTS 2.900033
## AAACCCAGTCCAAATC-NS3R189BTS 2.285611
## AAACCCAGTGAGTTTC-NS3R189BTS 3.778567
## AAACCCAGTTCTTAGG-NS3R189BTS 3.275078
## AAACCCATCAGCTGAT-NS3R189BTS 3.382402
save.image(file=paste(outputpath,"SeuratObjectAfterMarker.RData",sep=""))
sessionInfo()
## R version 4.2.2 (2022-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
##
## Matrix products: default
## BLAS/LAPACK: /usr/local/intel/compilers_and_libraries_2020.2.254/linux/mkl/lib/intel64_lin/libmkl_rt.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] cowplot_1.1.1 rmarkdown_2.18 forcats_0.5.2
## [4] stringr_1.4.1 dplyr_1.0.10 purrr_0.3.5
## [7] readr_2.1.3 tidyr_1.2.1 tibble_3.1.8
## [10] tidyverse_1.3.2 kableExtra_1.3.4 KernSmooth_2.23-20
## [13] fields_14.1 viridis_0.6.2 viridisLite_0.4.1
## [16] spam_2.9-1 plotly_4.10.1 ggplot2_3.4.0
## [19] SeuratObject_4.1.3 Seurat_4.2.1 DoubletFinder_2.0.3
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.2 spatstat.explore_3.0-5 reticulate_1.26
## [4] tidyselect_1.2.0 htmlwidgets_1.5.4 grid_4.2.2
## [7] Rtsne_0.16 munsell_0.5.0 codetools_0.2-18
## [10] ragg_1.2.4 ica_1.0-3 future_1.29.0
## [13] miniUI_0.1.1.1 withr_2.5.0 spatstat.random_3.0-1
## [16] colorspace_2.0-3 progressr_0.11.0 highr_0.9
## [19] knitr_1.40 rstudioapi_0.14 ROCR_1.0-11
## [22] tensor_1.5 listenv_0.8.0 labeling_0.4.2
## [25] polyclip_1.10-4 bit64_4.0.5 farver_2.1.1
## [28] parallelly_1.32.1 vctrs_0.5.0 generics_0.1.3
## [31] xfun_0.34 timechange_0.1.1 R6_2.5.1
## [34] ggbeeswarm_0.6.0 hdf5r_1.3.7 spatstat.utils_3.0-1
## [37] cachem_1.0.6 assertthat_0.2.1 promises_1.2.0.1
## [40] scales_1.2.1 googlesheets4_1.0.1 beeswarm_0.4.0
## [43] gtable_0.3.1 globals_0.16.1 goftest_1.2-3
## [46] rlang_1.0.6 systemfonts_1.0.4 splines_4.2.2
## [49] lazyeval_0.2.2 gargle_1.2.1 spatstat.geom_3.0-3
## [52] broom_1.0.1 yaml_2.3.6 reshape2_1.4.4
## [55] abind_1.4-5 modelr_0.1.9 backports_1.4.1
## [58] httpuv_1.6.6 tools_4.2.2 ellipsis_0.3.2
## [61] jquerylib_0.1.4 RColorBrewer_1.1-3 ggridges_0.5.4
## [64] Rcpp_1.0.9 plyr_1.8.7 deldir_1.0-6
## [67] pbapply_1.5-0 zoo_1.8-11 haven_2.5.1
## [70] ggrepel_0.9.2 cluster_2.1.4 fs_1.5.2
## [73] magrittr_2.0.3 data.table_1.14.4 scattermore_0.8
## [76] reprex_2.0.2 lmtest_0.9-40 RANN_2.6.1
## [79] googledrive_2.0.0 fitdistrplus_1.1-8 matrixStats_0.62.0
## [82] hms_1.1.2 patchwork_1.1.2 mime_0.12
## [85] evaluate_0.18 xtable_1.8-4 readxl_1.4.1
## [88] gridExtra_2.3 compiler_4.2.2 maps_3.4.1
## [91] crayon_1.5.2 htmltools_0.5.3 later_1.3.0
## [94] tzdb_0.3.0 lubridate_1.9.0 DBI_1.1.3
## [97] dbplyr_2.2.1 MASS_7.3-58.1 Matrix_1.5-3
## [100] cli_3.4.1 parallel_4.2.2 dotCall64_1.0-2
## [103] igraph_1.3.5 pkgconfig_2.0.3 sp_1.5-1
## [106] spatstat.sparse_3.0-0 xml2_1.3.3 svglite_2.1.0
## [109] vipor_0.4.5 bslib_0.4.1 webshot_0.5.4
## [112] rvest_1.0.3 digest_0.6.30 sctransform_0.3.5
## [115] RcppAnnoy_0.0.20 spatstat.data_3.0-0 cellranger_1.1.0
## [118] leiden_0.4.3 uwot_0.1.14 curl_4.3.3
## [121] shiny_1.7.3 lifecycle_1.0.3 nlme_3.1-160
## [124] jsonlite_1.8.3 limma_3.54.0 fansi_1.0.3
## [127] pillar_1.8.1 lattice_0.20-45 ggrastr_1.0.1
## [130] fastmap_1.1.0 httr_1.4.4 survival_3.4-0
## [133] glue_1.6.2 remotes_2.4.2 png_0.1-7
## [136] bit_4.0.4 stringi_1.7.8 sass_0.4.2
## [139] textshaping_0.3.6 irlba_2.3.5.1 future.apply_1.10.0
# Reference
# https://satijalab.org/seurat/articles/multimodal_vignette.html
#file.copy(from = "/data/../TotalSeq/Tools/clustering.html",
# to = "/data/../TotalSeq/Reports/site/clustering.html")
#file.remove("/data/../TotalSeq/Tools/clustering.html")