1. Introduction

This document takes the QCd, normalized, scaled, batch corrected, clustered, marker identified data, annotates the clusters and integrates ADT data to GEX data

#Install and load required R packages. Install the packages if you do not already have them installed
devtools::install_github("grisslab/scClassifR")
#BiocManager::install("celldex")
BiocManager::install("SCINA", update=FALSE)
install.packages("devtools", repos='http://cran.us.r-project.org')
devtools::install_github("PulakNath/scCATCH")
install.packages("HGNChelper", repos='http://cran.us.r-project.org')
devtools::install_github("rpolicastro/scProportionTest")
library("scProportionTest")
library(HGNChelper)
library(scClassifR)
library(SingleCellExperiment)
library(SingleR)
library(celldex)
#library(SCINA)
library(Seurat)
library(kableExtra)
library(ggplot2)
library(plotly)
library(tidyverse)
library(rmarkdown)
library(scCATCH)

2. Load QC’d, normalized, scaled, batch corrected data and clustered data

# Load QCd, normalized data
load('/data/../TotalSeq/Results/markers/SeuratObjectAfterMarker.RData')
# Output path
outputpath="/data/../TotalSeq/Results/annotation/"
# Set working directory
#setwd(outputpath)

3. Cluster 5 as NK

# Since NK cells are predominantly in cluster 5, we could find markers within this cluster for HC vs BCR-UV
Idents(allsamples) <- "RNA_snn_res.0.25"
prop.table(table(Idents(allsamples)))
##
##            0            1            2            3            4            5
## 0.3756997582 0.1352479380 0.1239524330 0.0682367750 0.0675411574 0.0620424658
##            6            7            8            9           10           11
## 0.0529994369 0.0480307397 0.0273278346 0.0216635198 0.0114611282 0.0049024479
##           12
## 0.0008943655
table(Idents(allsamples))
##
##     0     1     2     3     4     5     6     7     8     9    10    11    12
## 11342  4083  3742  2060  2039  1873  1600  1450   825   654   346   148    27
# Current identity levels
levels(allsamples)
##  [1] "0"  "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10" "11" "12"
head(Idents(allsamples))
## AAACCCACACTGGAAG-NS3R189BTS AAACCCACATGACGTT-NS3R189BTS
##                           2                           1
## AAACCCAGTCCAAATC-NS3R189BTS AAACCCAGTGAGTTTC-NS3R189BTS
##                           0                           1
## AAACCCAGTTCTTAGG-NS3R189BTS AAACCCATCAGCTGAT-NS3R189BTS
##                           0                           1
## Levels: 0 1 2 3 4 5 6 7 8 9 10 11 12
# Take all cells in cluster 5, and find markers that separate 'HC' and 'BCR-UV' that are in the 'orig.ident' column. subset.ident would the default identity of clusters
markersinnkgroup <- FindMarkers(allsamples, ident.1 = "HC", ident.2 = "BCR-UV", group.by = 'type', subset.ident = 5)

# Save markers
write.table(markersinnkgroup, file="/data/../TotalSeq/Results/annotation/markersinnkgroup.txt", quote=F, row.names = TRUE, sep = "\t")

# Plot top2 up and top2 down regulated markers
VlnPlot(allsamples,idents = c("5"),features=c("FOS","MYOM2","PTGDS","MTRNR2L8"), group.by = "type", pt.size = 0, ncol=2)

cluster5vln = VlnPlot(allsamples,idents = c("5"),features=c("FOS","MYOM2","PTGDS","MTRNR2L8"), group.by = "type", pt.size = 0, ncol=2)

ggsave(paste(outputpath,"violin-with-cluster5nk-markers.pdf",sep=""), plot = cluster5vln, width = 20, height = 15, units = "cm")

FeaturePlot(allsamples, features=c("FOS","MYOM2","PTGDS","MTRNR2L8"), split.by = "type", min.cutoff = "q50", max.cutoff = "q90")

cluster5fp=FeaturePlot(allsamples, features=c("FOS","MYOM2","PTGDS","MTRNR2L8"), split.by = "type", min.cutoff = "q50", max.cutoff = "q90")

ggsave(paste(outputpath,"umap-with-cluster5-markers.pdf",sep=""), plot = cluster5fp, width = 20, height = 15, units = "cm")
#sort -n -k3 markersinnkgroup.txt | head

4. Annotation with sc-type

## SC-TYPE
## Get reference data
source("/data/../TotalSeq/Tools/sctype/gene_sets_prepare.R")
source("/data/../TotalSeq/Tools/sctype/sctype_score_.R")
db_ = "/data/../TotalSeq/Tools/sctype/ScTypeDB_short.xlsx";
# Set tissue as immune system
tissue = "Immune system"
# Prepare marker gene list
gs_list = gene_sets_prepare(db_, tissue)

# get cell-type by cell matrix
es.max = sctype_score(scRNAseqData = allsamples[["RNA"]]@scale.data, scaled = TRUE, gs = gs_list$gs_positive, gs2 = gs_list$gs_negative)

# merge by cluster (column )
cL_resutls = do.call("rbind", lapply(unique(allsamples@meta.data$RNA_snn_res.0.25), function(cl){
    es.max.cl = sort(rowSums(es.max[ ,rownames(allsamples@meta.data[allsamples@meta.data$RNA_snn_res.0.25==cl, ])]), decreasing = !0)
    head(data.frame(cluster = cl, type = names(es.max.cl), scores = es.max.cl), 10)
}))

# List top cell types
cL_resutls %>% group_by(cluster) %>% top_n(n = 1)   
## # A tibble: 13 × 3
## # Groups:   cluster [13]
##    cluster type                         scores
##    <fct>   <chr>                         <dbl>
##  1 2       Naive CD4+ T cells             761.
##  2 1       Classical Monocytes           5413.
##  3 0       Naive CD4+ T cells            1472.
##  4 6       Classical Monocytes           6800.
##  5 3       Naive CD8+ T cells            2141.
##  6 5       CD8+ NKT-like cells           6035.
##  7 7       Pre-B cells                   6739.
##  8 9       Memory CD4+ T cells           1182.
##  9 10      Platelets                     2946.
## 10 4       ISG expressing immune cells   1598.
## 11 11      Classical Monocytes            803.
## 12 8       CD8+ NKT-like cells            954.
## 13 12      Plasmacytoid Dendritic cells   266.
# Save sctype annottions
write.table(as.data.frame(cL_resutls %>% group_by(cluster) %>% top_n(n = 1)), file="/data/../TotalSeq/Results/annotation/sctype_annotations.txt", quote=F, row.names = FALSE, sep = "\t")

# Add sctype metadata
allsamples@meta.data$sctype = ""
for(j in unique(cL_resutls$cluster)){
  cl_type = cL_resutls[cL_resutls$cluster==j,]; cl_type = cl_type[order(cl_type$scores, decreasing = T), ]
  if(cl_type$scores[1]>0){
    allsamples@meta.data$sctype[allsamples@meta.data$RNA_snn_res.0.25 == j] = as.character(cl_type$type[1])
  } else {
    allsamples@meta.data$sctype[allsamples@meta.data$RNA_snn_res.0.25 == j] = "Unknown"
  }
}

# create and save umap with labels
DimPlot(allsamples, reduction = "umap", label = TRUE, repel = TRUE, group.by = 'sctype', split.by = "type")        

an0=DimPlot(allsamples, reduction = "umap", label = TRUE, repel = TRUE, group.by = 'sctype', split.by = "type")
an0a=an0+theme(legend.position="none")
an0a

#sctypelabel=DimPlot(allsamples, reduction = "umap", label = TRUE, repel = TRUE, group.by = 'sctype', split.by = "type")
#sctypelabel=sctypelabel+theme(legend.position="none)
ggsave(paste(outputpath,"umap-with-sctype.pdf",sep=""),plot=an0a, width = 50, height = 30, units = "cm")


## Run scsa using the scsa_workflow.R script

5. Manual annotation

## Annotation from scsa, sc-type are combined and manually annotated by Researcher
# Copy cluster column
allsamples[["annotated_RNA_snn_res.0.25"]] = allsamples[["RNA_snn_res.0.25"]]
# Set identity column
Idents(allsamples) <- "annotated_RNA_snn_res.0.25"
# Rename classes.

# Read annotation file
myannotations=read.csv("/data/../TotalSeq/Results/annotation/manualannotation.csv",sep=",")

# Map manual annotation
allsamples$annotated_RNA_snn_res.0.25 <- plyr::mapvalues(
  x = allsamples$RNA_snn_res.0.25,
  from = myannotations$clusternumber,
  to = myannotations$clustername
)

Idents(allsamples) <- "annotated_RNA_snn_res.0.25"
head(allsamples)
##                             orig.ident nCount_RNA nFeature_RNA nCount_ADT
## AAACCCACACTGGAAG-NS3R189BTS NS3R189BTS       2676         1078       1191
## AAACCCACATGACGTT-NS3R189BTS NS3R189BTS       2329         1271       1135
## AAACCCAGTCCAAATC-NS3R189BTS NS3R189BTS       7926         2660       2034
## AAACCCAGTGAGTTTC-NS3R189BTS NS3R189BTS       2339         1291       1058
## AAACCCAGTTCTTAGG-NS3R189BTS NS3R189BTS       3537         1608        936
## AAACCCATCAGCTGAT-NS3R189BTS NS3R189BTS       1758          948       1375
## AAACCCATCATTCACT-NS3R189BTS NS3R189BTS       1478          930        186
## AAACCCATCCGGACGT-NS3R189BTS NS3R189BTS       2045         1003        883
## AAACCCATCCGTGGTG-NS3R189BTS NS3R189BTS       5150         2003       2014
## AAACCCATCCTCTCTT-NS3R189BTS NS3R189BTS       2287         1306        645
##                             nFeature_ADT percent.mito percent.ribo percent.hb
## AAACCCACACTGGAAG-NS3R189BTS           81    0.8221226     42.52616 0.00000000
## AAACCCACATGACGTT-NS3R189BTS           84    4.3795620     18.97810 0.04293688
## AAACCCAGTCCAAATC-NS3R189BTS           81    4.6177139     30.05299 0.02523341
## AAACCCAGTGAGTTTC-NS3R189BTS           79    5.3014109     21.67593 0.00000000
## AAACCCAGTTCTTAGG-NS3R189BTS           78    2.2900763     23.35312 0.00000000
## AAACCCATCAGCTGAT-NS3R189BTS           84    5.1194539     26.67804 0.00000000
## AAACCCATCATTCACT-NS3R189BTS           48    2.6387009     24.56022 0.00000000
## AAACCCATCCGGACGT-NS3R189BTS           78    3.4229829     30.66015 0.00000000
## AAACCCATCCGTGGTG-NS3R189BTS           82    0.6019417     35.28155 0.00000000
## AAACCCATCCTCTCTT-NS3R189BTS           83    2.9296021     19.45780 0.00000000
##                             batch type RNA_snn_res.0.25 RNA_snn_res.0.75
## AAACCCACACTGGAAG-NS3R189BTS     1   HC                2                2
## AAACCCACATGACGTT-NS3R189BTS     1   HC                1                9
## AAACCCAGTCCAAATC-NS3R189BTS     1   HC                0               15
## AAACCCAGTGAGTTTC-NS3R189BTS     1   HC                1                3
## AAACCCAGTTCTTAGG-NS3R189BTS     1   HC                0                4
## AAACCCATCAGCTGAT-NS3R189BTS     1   HC                1                3
## AAACCCATCATTCACT-NS3R189BTS     1   HC                0                0
## AAACCCATCCGGACGT-NS3R189BTS     1   HC                2                2
## AAACCCATCCGTGGTG-NS3R189BTS     1   HC                0                4
## AAACCCATCCTCTCTT-NS3R189BTS     1   HC                0                0
##                             seurat_clusters              sctype
## AAACCCACACTGGAAG-NS3R189BTS               2  Naive CD4+ T cells
## AAACCCACATGACGTT-NS3R189BTS               9 Classical Monocytes
## AAACCCAGTCCAAATC-NS3R189BTS              15  Naive CD4+ T cells
## AAACCCAGTGAGTTTC-NS3R189BTS               3 Classical Monocytes
## AAACCCAGTTCTTAGG-NS3R189BTS               4  Naive CD4+ T cells
## AAACCCATCAGCTGAT-NS3R189BTS               3 Classical Monocytes
## AAACCCATCATTCACT-NS3R189BTS               0  Naive CD4+ T cells
## AAACCCATCCGGACGT-NS3R189BTS               2  Naive CD4+ T cells
## AAACCCATCCGTGGTG-NS3R189BTS               4  Naive CD4+ T cells
## AAACCCATCCTCTCTT-NS3R189BTS               0  Naive CD4+ T cells
##                             annotated_RNA_snn_res.0.25
## AAACCCACACTGGAAG-NS3R189BTS              C2_CD4+ cells
## AAACCCACATGACGTT-NS3R189BTS     C1_Classical Monocytes
## AAACCCAGTCCAAATC-NS3R189BTS            C0_CD4+ T cells
## AAACCCAGTGAGTTTC-NS3R189BTS     C1_Classical Monocytes
## AAACCCAGTTCTTAGG-NS3R189BTS            C0_CD4+ T cells
## AAACCCATCAGCTGAT-NS3R189BTS     C1_Classical Monocytes
## AAACCCATCATTCACT-NS3R189BTS            C0_CD4+ T cells
## AAACCCATCCGGACGT-NS3R189BTS              C2_CD4+ cells
## AAACCCATCCGTGGTG-NS3R189BTS            C0_CD4+ T cells
## AAACCCATCCTCTCTT-NS3R189BTS            C0_CD4+ T cells
# create and save umap with manual annotation labels
an1=DimPlot(allsamples, reduction = "umap", label = TRUE, repel = TRUE, group.by = 'annotated_RNA_snn_res.0.25')
an1a=an1+theme(legend.position="none")
an1a

an2=DimPlot(allsamples, reduction = "umap", label = TRUE, repel = TRUE, group.by = 'annotated_RNA_snn_res.0.25', split.by = "type")
an2a=an2+theme(legend.position="none")
an2a      

#scsalabel=DimPlot(allsamples, reduction = "umap", label = TRUE, repel = TRUE, group.by = 'annotated_RNA_snn_res.0.25', split.by = "type")+theme(legend.position="none)
ggsave(paste(outputpath,"umap-manual-annotation.pdf",sep=""), plot=an2a, width = 50, height = 30, units = "cm")

6. Proportion test

# Sample proportion table
# Proportion and counts of cells in individual samples
# Change identity to 0.25 cluster resolution

# Proportion test
# assign sample group
allsamples$sample=ifelse(grepl("HC",allsamples$type), "HC", ifelse(grepl("UV",allsamples$type), "UV", ""))

Idents(allsamples) <- "RNA_snn_res.0.25"
prop.table(table(Idents(allsamples), allsamples$sample))
##
##                HC           UV
##   0  0.2610884759 0.1146112823
##   1  0.1347510683 0.0004968697
##   2  0.1121600583 0.0117923747
##   3  0.0251747325 0.0430620425
##   4  0.0022856007 0.0652555567
##   5  0.0325284044 0.0295140614
##   6  0.0352777502 0.0177216867
##   7  0.0230216304 0.0250091093
##   8  0.0065255557 0.0208022790
##   9  0.0082811620 0.0133823578
##   10 0.0051343204 0.0063268078
##   11 0.0034449634 0.0014574845
##   12 0.0005631190 0.0003312465
table(Idents(allsamples), allsamples$sample)
##
##        HC   UV
##   0  7882 3460
##   1  4068   15
##   2  3386  356
##   3   760 1300
##   4    69 1970
##   5   982  891
##   6  1065  535
##   7   695  755
##   8   197  628
##   9   250  404
##   10  155  191
##   11  104   44
##   12   17   10
# Save tables
write.table(prop.table(table(Idents(allsamples), allsamples$sample)), file=paste(outputpath,"cluster-proportions.txt",sep=""), quote=F, sep = "\t")
write.table(table(Idents(allsamples), allsamples$sample), file=paste(outputpath,"cluster-counts.txt",sep=""), quote=F, sep = "\t")

# Sample proportion table based on manual annotation
# Proportion and counts of cells in individual samples
# Change identity to manual annotation
Idents(allsamples) <- "annotated_RNA_snn_res.0.25"
prop.table(table(Idents(allsamples), allsamples$sample))
##
##                                              HC           UV
##   C0_CD4+ T cells                  0.2610884759 0.1146112823
##   C1_Classical Monocytes           0.1347510683 0.0004968697
##   C2_CD4+ cells                    0.1121600583 0.0117923747
##   C3_CD8+ T cells                  0.0251747325 0.0430620425
##   C4_CD4+ T cells                  0.0022856007 0.0652555567
##   C5_NK cells                      0.0325284044 0.0295140614
##   C6_Monocytes/Neutrophils         0.0352777502 0.0177216867
##   C7_B cells                       0.0230216304 0.0250091093
##   C8_CD8+ NKT-like cells           0.0065255557 0.0208022790
##   C9_CD4+ T cells                  0.0082811620 0.0133823578
##   C10_Platelets                    0.0051343204 0.0063268078
##   C11_Ionocytes/Monoytes           0.0034449634 0.0014574845
##   C12_Plasmacytoid Dendritic cells 0.0005631190 0.0003312465
table(Idents(allsamples), allsamples$sample)
##
##                                      HC   UV
##   C0_CD4+ T cells                  7882 3460
##   C1_Classical Monocytes           4068   15
##   C2_CD4+ cells                    3386  356
##   C3_CD8+ T cells                   760 1300
##   C4_CD4+ T cells                    69 1970
##   C5_NK cells                       982  891
##   C6_Monocytes/Neutrophils         1065  535
##   C7_B cells                        695  755
##   C8_CD8+ NKT-like cells            197  628
##   C9_CD4+ T cells                   250  404
##   C10_Platelets                     155  191
##   C11_Ionocytes/Monoytes            104   44
##   C12_Plasmacytoid Dendritic cells   17   10
# Save tables
write.table(prop.table(table(Idents(allsamples), allsamples$sample)), file=paste(outputpath,"cluster-proportions-annotated.txt",sep=""), quote=F, sep = "\t")
write.table(table(Idents(allsamples), allsamples$sample), file=paste(outputpath,"cluster-counts-annotated.txt",sep=""), quote=F, sep = "\t")

# Run permutation test
prop_test <- sc_utils(allsamples)
prop_test <- permutation_test(
    prop_test, cluster_identity = "annotated_RNA_snn_res.0.25",
  #prop_test, cluster_identity = "RNA_snn_res.0.25",
    sample_1 = "HC", sample_2 = "UV",
    sample_identity = "sample",
    n_permutations = 10000
)

plot_data=prop_test@results$permutation
plot_data
##                             clusters           HC           UV  obs_log2FD
##  1:                  C0_CD4+ T cells 0.4015282731 0.3276825457 -0.29320275
##  2:                    C10_Platelets 0.0078960774 0.0180888342  1.19589139
##  3:           C11_Ionocytes/Monoytes 0.0052980132 0.0041670613 -0.34642114
##  4: C12_Plasmacytoid Dendritic cells 0.0008660214 0.0009470594  0.12905222
##  5:           C1_Classical Monocytes 0.2072338258 0.0014205891 -7.18862640
##  6:                    C2_CD4+ cells 0.1724910851 0.0337153140 -2.35504586
##  7:                 C3_CD8+ T cells  0.0387162506 0.1231177195  1.66902726
##  8:                  C4_CD4+ T cells 0.0035150280 0.1865706980  5.73004242
##  9:                      C5_NK cells 0.0500254712 0.0843829908  0.75428937
## 10:         C6_Monocytes/Neutrophils 0.0542536933 0.0506676769 -0.09865567
## 11:                       C7_B cells 0.0354049924 0.0715029832  1.01405063
## 12:           C8_CD8+ NKT-like cells 0.0100356597 0.0594753291  2.56715589
## 13:                  C9_CD4+ T cells 0.0127356088 0.0382611990  1.58701416
##           pval         FDR boot_mean_log2FD boot_CI_2.5 boot_CI_97.5
##  1: 0.00009999 0.000129987      -0.29344781  -0.3393655  -0.24792236
##  2: 0.00009999 0.000129987       1.19747035   0.8945870   1.50263308
##  3: 0.10358964 0.112222111      -0.35793167  -0.8949933   0.14677399
##  4: 0.48375162 0.483751625       0.09541642  -1.2053472   1.24251027
##  5: 0.00009999 0.000129987      -7.24325898  -8.1106955  -6.57195504
##  6: 0.00009999 0.000129987      -2.35658219  -2.5100541  -2.20791338
##  7: 0.00009999 0.000129987       1.66903599   1.5456714   1.79216590
##  8: 0.00009999 0.000129987       5.73947814   5.4136458   6.10258624
##  9: 0.00009999 0.000129987       0.75401531   0.6273871   0.88035792
## 10: 0.09659034 0.112222111      -0.09880554  -0.2447594   0.04771904
## 11: 0.00009999 0.000129987       1.01482993   0.8681883   1.16112995
## 12: 0.00009999 0.000129987       2.56783179   2.3455669   2.80419351
## 13: 0.00009999 0.000129987       1.58934252   1.3664078   1.82194815
FDR_threshold = 0.05
log2FD_threshold = log2(1.5)
order_clusters = TRUE

plot_data[, significance := ifelse(
  FDR < FDR_threshold & abs(obs_log2FD) > log2FD_threshold,
  paste("FDR <", FDR_threshold, "& abs(Log2FD) >", round(log2FD_threshold, 2)),
  "n.s."
)]

plot_data[, significance := factor(significance, levels = c(
  paste("FDR <", FDR_threshold, "& abs(Log2FD) >", round(log2FD_threshold, 2)),
  "n.s."
))]

#library(tidyverse)
## Order the clusters by observed log2FD if requested.
if (order_clusters) {
  plot_data[, clusters := fct_reorder(factor(clusters), dplyr::desc(obs_log2FD))]
}

## Plot the results.
p <- ggplot(plot_data, aes(x = clusters, y = obs_log2FD)) +
  geom_pointrange(aes(ymin = boot_CI_2.5, ymax = boot_CI_97.5, color = significance)) +
  theme_bw() +
  geom_hline(yintercept = log2FD_threshold, lty = 2) +
  geom_hline(yintercept = -log2FD_threshold, lty = 2) +
  geom_hline(yintercept = 0) +
  scale_color_manual(values = c("salmon", "grey")) +
  coord_flip()

p

ggsave(paste(outputpath,"cluster_proportion_test.pdf",sep=""), plot=p)


#archive code below
#permutation_plot(prop_test)
#png("proportion.png")
#permutation_plot(prop_test)
#dev.off()

7. ADT with GEX

# Visualize ADT with GEX data
# Specify key for rna features and ADT features
# Add rna_ or adt_ to any gene to visualize
rownames(allsamples[["ADT"]])
##  [1] "CD107a-LAMP-1"                "CD115-CSF-1R"
##  [3] "CD119-IFN-g-R-a-chain"        "CD11b"
##  [5] "CD11c"                        "CD123"
##  [7] "CD127-IL-7Ra"                 "CD135-Flt-3-Flk-2"
##  [9] "CD137-4-1BB"                  "CD137L-4-1BB-Ligand"
## [11] "CD14.1"                       "CD141-Thrombomodulin"
## [13] "CD152-CTLA-4"                 "CD154"
## [15] "CD158e1-KIR3DL1-NKB1"         "CD16"
## [17] "CD163.1"                      "CD172a-SIRPa"
## [19] "CD178-Fas-L"                  "CD183-CXCR3"
## [21] "CD185-CXCR5"                  "CD19.1"
## [23] "CD196-CCR6"                   "CD197-CCR7"
## [25] "CD1c"                         "CD20"
## [27] "CD218a-IL-18Ra"               "CD223-LAG-3"
## [29] "CD226.1"                      "CD24.1"
## [31] "CD244-2B4"                    "CD25"
## [33] "CD27.1"                       "CD273-B7-DC-PD-L2"
## [35] "CD274-B7-H1-PD-L1"            "CD275"
## [37] "CD278-ICOS"                   "CD279-PD-1"
## [39] "CD28.1"                       "CD3"
## [41] "CD303-BDCA-2"                 "CD307c-FcRL3"
## [43] "CD307d-FcRL4"                 "CD307e-FcRL5"
## [45] "CD314-NKG2D"                  "CD335-NKp46"
## [47] "CD336"                        "CD337-NKp30"
## [49] "CD34.1"                       "CD357-GITR"
## [51] "CD366-Tim-3"                  "CD38.1"
## [53] "CD39"                         "CD4.1"
## [55] "CD40.1"                       "CD44.1"
## [57] "CD45"                         "CD45RA"
## [59] "CD45RO"                       "CD47.1"
## [61] "CD48.1"                       "CD56-NCAM"
## [63] "CD57-Recombinant"             "CD62L"
## [65] "CD64"                         "CD69.1"
## [67] "CD70.1"                       "CD8"
## [69] "CD80.1"                       "CD86.1"
## [71] "CD8a"                         "CD94"
## [73] "CD95-Fas"                     "HLA-A-B-C"
## [75] "HLA-A2"                       "HLA-DR"
## [77] "HLA-E"                        "IgD"
## [79] "IgM"                          "KLRG1-MAFA"
## [81] "Lymphotoxin-b-Receptor-LT-bR" "NKp80"
## [83] "TCR-g-d"                      "TIGIT-VSTM3"
## [85] "b2-microglobulin"
Key(allsamples[["RNA"]])
##
## "rna_"
Key(allsamples[["ADT"]])
##
## "adt_"
head(as.data.frame(rownames(allsamples)))
##   rownames(allsamples)
## 1          MIR1302-2HG
## 2              FAM138A
## 3                OR4F5
## 4           AL627309.1
## 5           AL627309.3
## 6           AL627309.2
adtfeatures=c("adt_CD45","adt_CD45RA","adt_CD45RO","adt_CD196-CCR6","adt_CD197-CCR7","adt_CD11c")
gexfeatures=c("rna_PTPRC","rna_PTPRC","rna_PTPRC","rna_CCR6","rna_CCR7","rna_ITGAX")

p1 <- FeaturePlot(allsamples, features=adtfeatures, min.cutoff = "q25", max.cutoff = "q90", split.by = "type")
p2 <- FeaturePlot(allsamples, features=gexfeatures, min.cutoff = "q25", max.cutoff = "q90", split.by = "type")
p1 | p2

p3=p1|p2

ggsave(paste(outputpath,"umap-with-adt.pdf",sep=""), plot = p1, width = 20, height = 40, units = "cm")
ggsave(paste(outputpath,"umap-with-gex.pdf",sep=""), plot = p2, width = 20, height = 40, units = "cm")


save.image(file=paste(outputpath,"SeuratObjectAfterAnnotation.RData",sep=""))
sessionInfo()
## R version 4.2.2 (2022-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
##
## Matrix products: default
## BLAS/LAPACK: /usr/local/intel/compilers_and_libraries_2020.2.254/linux/mkl/lib/intel64_lin/libmkl_rt.so
##
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods
## [8] base
##
## other attached packages:
##  [1] scCATCH_2.1                 rmarkdown_2.18
##  [3] forcats_0.5.2               stringr_1.4.1
##  [5] dplyr_1.0.10                purrr_0.3.5
##  [7] readr_2.1.3                 tidyr_1.2.1
##  [9] tibble_3.1.8                tidyverse_1.3.2
## [11] plotly_4.10.1               ggplot2_3.4.0
## [13] kableExtra_1.3.4            celldex_1.8.0
## [15] SingleR_2.0.0               scClassifR_1.1.2
## [17] SingleCellExperiment_1.20.0 SummarizedExperiment_1.28.0
## [19] Biobase_2.58.0              GenomicRanges_1.50.1
## [21] GenomeInfoDb_1.34.2         IRanges_2.32.0
## [23] S4Vectors_0.36.0            BiocGenerics_0.44.0
## [25] MatrixGenerics_1.10.0       matrixStats_0.62.0
## [27] SeuratObject_4.1.3          Seurat_4.2.1
## [29] HGNChelper_0.8.1            scProportionTest_0.0.0.9000
##
## loaded via a namespace (and not attached):
##   [1] rappdirs_0.3.3                scattermore_0.8
##   [3] ModelMetrics_1.2.2.2          ragg_1.2.4
##   [5] bit64_4.0.5                   knitr_1.40
##   [7] irlba_2.3.5.1                 DelayedArray_0.24.0
##   [9] data.table_1.14.4             rpart_4.1.19
##  [11] KEGGREST_1.38.0               hardhat_1.2.0
##  [13] RCurl_1.98-1.9                generics_0.1.3
##  [15] ScaledMatrix_1.6.0            callr_3.7.3
##  [17] cowplot_1.1.1                 usethis_2.1.6
##  [19] RSQLite_2.2.18                RANN_2.6.1
##  [21] proxy_0.4-27                  future_1.29.0
##  [23] tzdb_0.3.0                    bit_4.0.4
##  [25] spatstat.data_3.0-0           webshot_0.5.4
##  [27] xml2_1.3.3                    lubridate_1.9.0
##  [29] httpuv_1.6.6                  assertthat_0.2.1
##  [31] gargle_1.2.1                  gower_1.0.0
##  [33] xfun_0.34                     hms_1.1.2
##  [35] jquerylib_0.1.4               data.tree_1.0.0
##  [37] evaluate_0.18                 promises_1.2.0.1
##  [39] progress_1.2.2                fansi_1.0.3
##  [41] readxl_1.4.1                  dbplyr_2.2.1
##  [43] igraph_1.3.5                  DBI_1.1.3
##  [45] htmlwidgets_1.5.4             spatstat.geom_3.0-3
##  [47] googledrive_2.0.0             ellipsis_0.3.2
##  [49] backports_1.4.1               deldir_1.0-6
##  [51] sparseMatrixStats_1.10.0      vctrs_0.5.0
##  [53] remotes_2.4.2                 ROCR_1.0-11
##  [55] abind_1.4-5                   caret_6.0-93
##  [57] cachem_1.0.6                  withr_2.5.0
##  [59] progressr_0.11.0              sctransform_0.3.5
##  [61] prettyunits_1.1.1             goftest_1.2-3
##  [63] svglite_2.1.0                 cluster_2.1.4
##  [65] ExperimentHub_2.6.0           ape_5.6-2
##  [67] lazyeval_0.2.2                crayon_1.5.2
##  [69] spatstat.explore_3.0-5        labeling_0.4.2
##  [71] recipes_1.0.3                 pkgconfig_2.0.3
##  [73] vipor_0.4.5                   nlme_3.1-160
##  [75] pkgload_1.3.1                 nnet_7.3-18
##  [77] devtools_2.4.5                rlang_1.0.6
##  [79] globals_0.16.1                lifecycle_1.0.3
##  [81] miniUI_0.1.1.1                filelock_1.0.2
##  [83] BiocFileCache_2.6.0           modelr_0.1.9
##  [85] rsvd_1.0.5                    AnnotationHub_3.6.0
##  [87] ggrastr_1.0.1                 cellranger_1.1.0
##  [89] polyclip_1.10-4               lmtest_0.9-40
##  [91] Matrix_1.5-3                  zoo_1.8-11
##  [93] beeswarm_0.4.0                reprex_2.0.2
##  [95] googlesheets4_1.0.1           ggridges_0.5.4
##  [97] processx_3.8.0                png_0.1-7
##  [99] viridisLite_0.4.1             bitops_1.0-7
## [101] KernSmooth_2.23-20            pROC_1.18.0
## [103] Biostrings_2.66.0             blob_1.2.3
## [105] DelayedMatrixStats_1.20.0     parallelly_1.32.1
## [107] spatstat.random_3.0-1         beachmat_2.14.0
## [109] scales_1.2.1                  memoise_2.0.1
## [111] magrittr_2.0.3                plyr_1.8.7
## [113] ica_1.0-3                     zlibbioc_1.44.0
## [115] compiler_4.2.2                RColorBrewer_1.1-3
## [117] fitdistrplus_1.1-8            cli_3.4.1
## [119] XVector_0.38.0                urlchecker_1.0.1
## [121] listenv_0.8.0                 patchwork_1.1.2
## [123] pbapply_1.5-0                 ps_1.7.2
## [125] MASS_7.3-58.1                 tidyselect_1.2.0
## [127] stringi_1.7.8                 textshaping_0.3.6
## [129] highr_0.9                     yaml_2.3.6
## [131] BiocSingular_1.14.0           ggrepel_0.9.2
## [133] grid_4.2.2                    sass_0.4.2
## [135] tools_4.2.2                   timechange_0.1.1
## [137] future.apply_1.10.0           parallel_4.2.2
## [139] rstudioapi_0.14               foreach_1.5.2
## [141] gridExtra_2.3                 prodlim_2019.11.13
## [143] farver_2.1.1                  Rtsne_0.16
## [145] digest_0.6.30                 BiocManager_1.30.19
## [147] shiny_1.7.3                   lava_1.7.0
## [149] Rcpp_1.0.9                    broom_1.0.1
## [151] BiocVersion_3.16.0            later_1.3.0
## [153] RcppAnnoy_0.0.20              httr_1.4.4
## [155] AnnotationDbi_1.60.0          kernlab_0.9-31
## [157] colorspace_2.0-3              rvest_1.0.3
## [159] fs_1.5.2                      tensor_1.5
## [161] reticulate_1.26               splines_4.2.2
## [163] uwot_0.1.14                   spatstat.utils_3.0-1
## [165] sp_1.5-1                      sessioninfo_1.2.2
## [167] systemfonts_1.0.4             xtable_1.8-4
## [169] jsonlite_1.8.3                timeDate_4021.106
## [171] ipred_0.9-13                  R6_2.5.1
## [173] profvis_0.3.7                 pillar_1.8.1
## [175] htmltools_0.5.3               mime_0.12
## [177] glue_1.6.2                    fastmap_1.1.0
## [179] BiocParallel_1.32.1           class_7.3-20
## [181] interactiveDisplayBase_1.36.0 codetools_0.2-18
## [183] pkgbuild_1.3.1                utf8_1.2.2
## [185] lattice_0.20-45               bslib_0.4.1
## [187] spatstat.sparse_3.0-0         ggbeeswarm_0.6.0
## [189] curl_4.3.3                    leiden_0.4.3
## [191] zip_2.2.2                     openxlsx_4.2.5.1
## [193] limma_3.54.0                  survival_3.4-0
## [195] munsell_0.5.0                 e1071_1.7-12
## [197] GenomeInfoDbData_1.2.9        iterators_1.0.14
## [199] haven_2.5.1                   reshape2_1.4.4
## [201] gtable_0.3.1

8. Generate R script

# Reference
# https://satijalab.org/seurat/articles/multimodal_vignette.html
# http://www.bioconductor.org/packages/release/bioc/vignettes/scClassifR/inst/doc/classifying-cells.html
# https://github.com/IanevskiAleksandr/sc-type
# https://github.com/bioinfo-ibms-pumc/SCSA
# https://github.com/rpolicastro/scProportionTest
# https://www.biostars.org/p/457243/
file.copy(from = "/data/../TotalSeq/Tools/annotations.html",
          to   = "/data/../TotalSeq/Reports/site/annotations.html")
## [1] FALSE
file.remove("/data/../TotalSeq/Tools/annotation.html")
## [1] TRUE