This document takes the cellranger count data for each of the samples, does qc anlaysis, filtering, generates before and after qc plots and combines the data.
# Install hdf5 library if its not already installed
#sudo apt-get install libhdf5-dev or brew install hdf5
#install.packages("hdf5r")
remotes::install_github('chris-mcginnis-ucsf/DoubletFinder')
suppressMessages(require(DoubletFinder))
# Load below libraries
library(Seurat)
library(ggplot2)
library(plotly)
# Set the input, output paths
datapath="/data/../TotalSeq/Results/cellranger"
outputpath="/data/../TotalSeq/Results/qualitycontrol/"
#datapath="/data/../TotalSeq/Results/cellranger"
#outputpath="/data/../TotalSeq/Reports/"
sampleinfopath="/data/../TotalSeq/SampleInformation.tab"
setwd(outputpath)
# Read the results folders
file_list <- list.files(path=datapath)
# List result folders
print("Samples in this analysis")
## [1] "Samples in this analysis"
print(file_list)
## [1] "NS3R189BTS" "NS7R65BBTS"
##################################### Extract and map short names
# Read sample information file
sampleinformation=read.csv(sampleinfopath,sep="\t")
#View(sampleinformation)
# Specify samples for analysis. Sample names should correspond to the cellranger output path folder names for the samples
samples=c("NS3R189BTS","NS7R65BBTS")
# Create empty lists to hold the combined data set
allsamplestomerge=list()
allsamplestomergeprefilter=list()
This part of the workflow reads in each of the samples, analyses the quality, performs the quality filter and merges the data. Both merged unfiltered and merged filtered data are then used for plotting the quality
# Using the for loop to reads the samples list
for(i in samples)
{
# Remove any old variables
rm(onesampledataFilt)
rm(onesampledata)
rm(onesample)
# Print the current sample name
print(i)
# generate a clean sample name
filesample=unlist(file_list[i])
# Read the data file
onesample <- Read10X_h5(file.path(datapath,paste0(i,"/"),"cellranger_output/filtered_feature_bc_matrix.h5"), use.names = T)
# Print column names
colnames(onesample$`Gene Expression`)
# Add gene names to columnnames
colnames(onesample$`Gene Expression`) <- paste(sapply(strsplit(colnames(onesample$`Gene Expression`),split="-"),'[[',1L),i,sep="-")
# Print column names
colnames(onesample$`Gene Expression`)
colnames(onesample$`Antibody Capture`) <- paste(sapply(strsplit(colnames(onesample$`Antibody Capture`),split="-"),'[[',1L),i,sep="-")
# Create a Seurat data object from the gex matrix, keeping all the features, and only cells with at least 300 genes
onesampledata <- CreateSeuratObject(counts = onesample[["Gene Expression"]], min.cells = 0,min.features = 300,names.field = 2,names.delim = "\\-")
# Add the ADT assay to the seurate gex object created above
onesampledata[["ADT"]] <- CreateAssayObject(onesample[["Antibody Capture"]][, colnames(x = onesampledata)])
# Normalize ADT data with standard CLR method
onesampledata <- NormalizeData(onesampledata, assay = "ADT", normalization.method = "CLR")
# Check genes names
head(rownames(onesampledata))
# Check data rows and columns
head(onesampledata)
# Generate Percent mito for each of the cells
onesampledata$percent.mito <- PercentageFeatureSet(onesampledata, pattern = "^MT-")
summary(onesampledata$percent.mito)
head(onesampledata)
# Generate Percent ribo for each of the cells
onesampledata$percent.ribo <- PercentageFeatureSet(onesampledata, pattern = "^RP[SL]")
summary(onesampledata$percent.ribo)
head(onesampledata)
# Generate Percent hemoglobin for each of the cells
onesampledata$percent.hb <- PercentageFeatureSet(onesampledata, pattern = "^HB[^(P)]")
summary(onesampledata$percent.hb)
head(onesampledata)
# Check the total number of cells in this sample
table(onesampledata$orig.ident)
# Filter data and keep only cells that has percent.mito less than or equal to 8
onesampledataFilt <- subset(onesampledata, percent.mito <= 8)
# Check the number of cells after mito filtering
table(onesampledataFilt$orig.ident)
# Filter for nCount RNA (total rna molecules sequenced)
onesampledataFilt <- subset(onesampledataFilt, nCount_RNA >= 1000 & nCount_RNA <= 12000)
# Check number of cells after ncount filtering
table(onesampledataFilt$orig.ident)
# Filter for nfeature (total number of genes expressed)
onesampledataFilt <- subset(onesampledataFilt, nFeature_RNA >= 700)
# Filter for percent ribo
onesampledataFilt <- subset(onesampledataFilt, percent.ribo > 10 & percent.ribo < 45)
# Counts after filtering
table(onesampledataFilt$orig.ident)
################################ Doublet Prediction
###################### Normalize Data
# Feature counts for each cell are divided by the total counts for that cell and multiplied by the scale.factor. This is then natural-log transformed
# Cell level normalization - accounts for sequencing depth
onesampledataFilt <- NormalizeData(
object = onesampledataFilt,
normalization.method = "LogNormalize",
scale.factor = 10000)
###################### Find variable genes
# Returns top 2000 variable genes
# VST is uses LOESS method
# FindVariableFeatures needs normalization
onesampledataFilt <- FindVariableFeatures(
object = onesampledataFilt,
selection.method = "vst")
##################### SCALE DATA
# PCA needs scaled data
# Zero centers and scales (mean/sd) gene/feature data in each cell (for across sample comparison), so extreme ranges in expression do not affect clustering (done for making cells with similar expression cluster together)
onesampledataFilt <- ScaleData(
object = onesampledataFilt,
features=VariableFeatures(onesampledataFilt))
##################### PCA and UMAP
# Run PCA with 50 pcs
onesampledataFilt <- RunPCA(
object = onesampledataFilt, npcs=50, verbose = T)
onesampledataFilt <- RunUMAP(
onesampledataFilt, dims = 1:10, verbose = F)
# Set expected 7% doublets
nExp <- round(ncol(onesampledataFilt) * 0.07)
# Find doublets
onesampledataFilt <- doubletFinder_v3(onesampledataFilt, pN = 0.25, pK = 0.09, nExp = nExp, PCs = 1:10)
# Name of the DF prediction can change, so extract the correct column name.
DF.name = colnames(onesampledataFilt@meta.data)[grepl("DF.classification", colnames(onesampledataFilt@meta.data))]
# Filter out doublets, save singlet data on to the list
onesampledataFilt = onesampledataFilt[, onesampledataFilt@meta.data[, DF.name] == "Singlet"]
# Total cells count after filtering
table(Idents(onesampledataFilt))
# Add filtered and unfiltered data to a list
allsamplestomerge[[i]]=onesampledataFilt
allsamplestomergeprefilter[[i]]=onesampledata
}
## [1] "NS3R189BTS"
## [1] "Creating 7036 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
## [1] "NS7R65BBTS"
## [1] "Creating 3785 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
# Create one and the rest of the samples as a list, for filtered data
one=allsamplestomerge[[1]]
one
## An object of class Seurat
## 36686 features across 19630 samples within 2 assays
## Active assay: RNA (36601 features, 2000 variable features)
## 1 other assay present: ADT
## 2 dimensional reductions calculated: pca, umap
rest=allsamplestomerge[2:length(allsamplestomerge)]
rest
## $NS7R65BBTS
## An object of class Seurat
## 36686 features across 10559 samples within 2 assays
## Active assay: RNA (36601 features, 2000 variable features)
## 1 other assay present: ADT
## 2 dimensional reductions calculated: pca, umap
# Create one and the rest of the samples as a list, for prefiltered data
onepre=allsamplestomergeprefilter[[1]]
onepre
## An object of class Seurat
## 36686 features across 24781 samples within 2 assays
## Active assay: RNA (36601 features, 0 variable features)
## 1 other assay present: ADT
restpre=allsamplestomergeprefilter[2:length(allsamplestomergeprefilter)]
restpre
## $NS7R65BBTS
## An object of class Seurat
## 36686 features across 14223 samples within 2 assays
## Active assay: RNA (36601 features, 0 variable features)
## 1 other assay present: ADT
# Merge filtered seurat objects
allsamples <- merge(one, y = rest, project = "UV")
allsamples
## An object of class Seurat
## 36686 features across 30189 samples within 2 assays
## Active assay: RNA (36601 features, 0 variable features)
## 1 other assay present: ADT
head(allsamples)
## orig.ident nCount_RNA nFeature_RNA nCount_ADT
## AAACCCACACTGGAAG-NS3R189BTS NS3R189BTS 2676 1078 1191
## AAACCCACATGACGTT-NS3R189BTS NS3R189BTS 2329 1271 1135
## AAACCCAGTCCAAATC-NS3R189BTS NS3R189BTS 7926 2660 2034
## AAACCCAGTGAGTTTC-NS3R189BTS NS3R189BTS 2339 1291 1058
## AAACCCAGTTCTTAGG-NS3R189BTS NS3R189BTS 3537 1608 936
## AAACCCATCAGCTGAT-NS3R189BTS NS3R189BTS 1758 948 1375
## AAACCCATCATTCACT-NS3R189BTS NS3R189BTS 1478 930 186
## AAACCCATCCGGACGT-NS3R189BTS NS3R189BTS 2045 1003 883
## AAACCCATCCGTGGTG-NS3R189BTS NS3R189BTS 5150 2003 2014
## AAACCCATCCTCTCTT-NS3R189BTS NS3R189BTS 2287 1306 645
## nFeature_ADT percent.mito percent.ribo percent.hb
## AAACCCACACTGGAAG-NS3R189BTS 81 0.8221226 42.52616 0.00000000
## AAACCCACATGACGTT-NS3R189BTS 84 4.3795620 18.97810 0.04293688
## AAACCCAGTCCAAATC-NS3R189BTS 81 4.6177139 30.05299 0.02523341
## AAACCCAGTGAGTTTC-NS3R189BTS 79 5.3014109 21.67593 0.00000000
## AAACCCAGTTCTTAGG-NS3R189BTS 78 2.2900763 23.35312 0.00000000
## AAACCCATCAGCTGAT-NS3R189BTS 84 5.1194539 26.67804 0.00000000
## AAACCCATCATTCACT-NS3R189BTS 48 2.6387009 24.56022 0.00000000
## AAACCCATCCGGACGT-NS3R189BTS 78 3.4229829 30.66015 0.00000000
## AAACCCATCCGTGGTG-NS3R189BTS 82 0.6019417 35.28155 0.00000000
## AAACCCATCCTCTCTT-NS3R189BTS 83 2.9296021 19.45780 0.00000000
## pANN_0.25_0.09_1477
## AAACCCACACTGGAAG-NS3R189BTS 0.04303198
## AAACCCACATGACGTT-NS3R189BTS 0.33004343
## AAACCCAGTCCAAATC-NS3R189BTS 0.17094355
## AAACCCAGTGAGTTTC-NS3R189BTS 0.31346230
## AAACCCAGTTCTTAGG-NS3R189BTS 0.30990920
## AAACCCATCAGCTGAT-NS3R189BTS 0.16818002
## AAACCCATCATTCACT-NS3R189BTS 0.23884722
## AAACCCATCCGGACGT-NS3R189BTS 0.05132254
## AAACCCATCCGTGGTG-NS3R189BTS 0.13896565
## AAACCCATCCTCTCTT-NS3R189BTS 0.29411765
## DF.classifications_0.25_0.09_1477
## AAACCCACACTGGAAG-NS3R189BTS Singlet
## AAACCCACATGACGTT-NS3R189BTS Singlet
## AAACCCAGTCCAAATC-NS3R189BTS Singlet
## AAACCCAGTGAGTTTC-NS3R189BTS Singlet
## AAACCCAGTTCTTAGG-NS3R189BTS Singlet
## AAACCCATCAGCTGAT-NS3R189BTS Singlet
## AAACCCATCATTCACT-NS3R189BTS Singlet
## AAACCCATCCGGACGT-NS3R189BTS Singlet
## AAACCCATCCGTGGTG-NS3R189BTS Singlet
## AAACCCATCCTCTCTT-NS3R189BTS Singlet
## pANN_0.25_0.09_795 DF.classifications_0.25_0.09_795
## AAACCCACACTGGAAG-NS3R189BTS NA <NA>
## AAACCCACATGACGTT-NS3R189BTS NA <NA>
## AAACCCAGTCCAAATC-NS3R189BTS NA <NA>
## AAACCCAGTGAGTTTC-NS3R189BTS NA <NA>
## AAACCCAGTTCTTAGG-NS3R189BTS NA <NA>
## AAACCCATCAGCTGAT-NS3R189BTS NA <NA>
## AAACCCATCATTCACT-NS3R189BTS NA <NA>
## AAACCCATCCGGACGT-NS3R189BTS NA <NA>
## AAACCCATCCGTGGTG-NS3R189BTS NA <NA>
## AAACCCATCCTCTCTT-NS3R189BTS NA <NA>
table(Idents(allsamples))
##
## NS3R189BTS NS7R65BBTS
## 19630 10559
# Merge prefiltered seurat objects
allsamplespre <- merge(onepre, y = restpre, project = "UV")
allsamplespre
## An object of class Seurat
## 36686 features across 39004 samples within 2 assays
## Active assay: RNA (36601 features, 0 variable features)
## 1 other assay present: ADT
head(allsamplespre)
## orig.ident nCount_RNA nFeature_RNA nCount_ADT
## AAACCCACAAGTCATC-NS3R189BTS NS3R189BTS 2431 1431 611
## AAACCCACACTGGAAG-NS3R189BTS NS3R189BTS 2676 1078 1191
## AAACCCACAGCACACC-NS3R189BTS NS3R189BTS 3394 1760 669
## AAACCCACAGGAGGAG-NS3R189BTS NS3R189BTS 2160 1279 1274
## AAACCCACATGACGTT-NS3R189BTS NS3R189BTS 2329 1271 1135
## AAACCCAGTAGTTACC-NS3R189BTS NS3R189BTS 5362 2261 2976
## AAACCCAGTCCAAATC-NS3R189BTS NS3R189BTS 7926 2660 2034
## AAACCCAGTCCATACA-NS3R189BTS NS3R189BTS 2239 1283 545
## AAACCCAGTGAGTTTC-NS3R189BTS NS3R189BTS 2339 1291 1058
## AAACCCAGTTCTTAGG-NS3R189BTS NS3R189BTS 3537 1608 936
## nFeature_ADT percent.mito percent.ribo percent.hb
## AAACCCACAAGTCATC-NS3R189BTS 77 4.1135335 18.510901 0.00000000
## AAACCCACACTGGAAG-NS3R189BTS 81 0.8221226 42.526158 0.00000000
## AAACCCACAGCACACC-NS3R189BTS 81 5.1856217 19.151444 0.00000000
## AAACCCACAGGAGGAG-NS3R189BTS 81 6.5277778 8.842593 0.00000000
## AAACCCACATGACGTT-NS3R189BTS 84 4.3795620 18.978102 0.04293688
## AAACCCAGTAGTTACC-NS3R189BTS 85 3.8418501 18.985453 0.01864976
## AAACCCAGTCCAAATC-NS3R189BTS 81 4.6177139 30.052990 0.02523341
## AAACCCAGTCCATACA-NS3R189BTS 81 5.0468959 19.830281 0.04466280
## AAACCCAGTGAGTTTC-NS3R189BTS 79 5.3014109 21.675930 0.00000000
## AAACCCAGTTCTTAGG-NS3R189BTS 78 2.2900763 23.353124 0.00000000
table(Idents(allsamplespre))
##
## NS3R189BTS NS7R65BBTS
## 24781 14223
# Assign batch ids to samples
allsamples$batch <- plyr::mapvalues(
x = allsamples$orig.ident,
from = sampleinformation$submitted_name,
to = sampleinformation$SampleBatch
)
head(allsamples)
## orig.ident nCount_RNA nFeature_RNA nCount_ADT
## AAACCCACACTGGAAG-NS3R189BTS NS3R189BTS 2676 1078 1191
## AAACCCACATGACGTT-NS3R189BTS NS3R189BTS 2329 1271 1135
## AAACCCAGTCCAAATC-NS3R189BTS NS3R189BTS 7926 2660 2034
## AAACCCAGTGAGTTTC-NS3R189BTS NS3R189BTS 2339 1291 1058
## AAACCCAGTTCTTAGG-NS3R189BTS NS3R189BTS 3537 1608 936
## AAACCCATCAGCTGAT-NS3R189BTS NS3R189BTS 1758 948 1375
## AAACCCATCATTCACT-NS3R189BTS NS3R189BTS 1478 930 186
## AAACCCATCCGGACGT-NS3R189BTS NS3R189BTS 2045 1003 883
## AAACCCATCCGTGGTG-NS3R189BTS NS3R189BTS 5150 2003 2014
## AAACCCATCCTCTCTT-NS3R189BTS NS3R189BTS 2287 1306 645
## nFeature_ADT percent.mito percent.ribo percent.hb
## AAACCCACACTGGAAG-NS3R189BTS 81 0.8221226 42.52616 0.00000000
## AAACCCACATGACGTT-NS3R189BTS 84 4.3795620 18.97810 0.04293688
## AAACCCAGTCCAAATC-NS3R189BTS 81 4.6177139 30.05299 0.02523341
## AAACCCAGTGAGTTTC-NS3R189BTS 79 5.3014109 21.67593 0.00000000
## AAACCCAGTTCTTAGG-NS3R189BTS 78 2.2900763 23.35312 0.00000000
## AAACCCATCAGCTGAT-NS3R189BTS 84 5.1194539 26.67804 0.00000000
## AAACCCATCATTCACT-NS3R189BTS 48 2.6387009 24.56022 0.00000000
## AAACCCATCCGGACGT-NS3R189BTS 78 3.4229829 30.66015 0.00000000
## AAACCCATCCGTGGTG-NS3R189BTS 82 0.6019417 35.28155 0.00000000
## AAACCCATCCTCTCTT-NS3R189BTS 83 2.9296021 19.45780 0.00000000
## pANN_0.25_0.09_1477
## AAACCCACACTGGAAG-NS3R189BTS 0.04303198
## AAACCCACATGACGTT-NS3R189BTS 0.33004343
## AAACCCAGTCCAAATC-NS3R189BTS 0.17094355
## AAACCCAGTGAGTTTC-NS3R189BTS 0.31346230
## AAACCCAGTTCTTAGG-NS3R189BTS 0.30990920
## AAACCCATCAGCTGAT-NS3R189BTS 0.16818002
## AAACCCATCATTCACT-NS3R189BTS 0.23884722
## AAACCCATCCGGACGT-NS3R189BTS 0.05132254
## AAACCCATCCGTGGTG-NS3R189BTS 0.13896565
## AAACCCATCCTCTCTT-NS3R189BTS 0.29411765
## DF.classifications_0.25_0.09_1477
## AAACCCACACTGGAAG-NS3R189BTS Singlet
## AAACCCACATGACGTT-NS3R189BTS Singlet
## AAACCCAGTCCAAATC-NS3R189BTS Singlet
## AAACCCAGTGAGTTTC-NS3R189BTS Singlet
## AAACCCAGTTCTTAGG-NS3R189BTS Singlet
## AAACCCATCAGCTGAT-NS3R189BTS Singlet
## AAACCCATCATTCACT-NS3R189BTS Singlet
## AAACCCATCCGGACGT-NS3R189BTS Singlet
## AAACCCATCCGTGGTG-NS3R189BTS Singlet
## AAACCCATCCTCTCTT-NS3R189BTS Singlet
## pANN_0.25_0.09_795 DF.classifications_0.25_0.09_795
## AAACCCACACTGGAAG-NS3R189BTS NA <NA>
## AAACCCACATGACGTT-NS3R189BTS NA <NA>
## AAACCCAGTCCAAATC-NS3R189BTS NA <NA>
## AAACCCAGTGAGTTTC-NS3R189BTS NA <NA>
## AAACCCAGTTCTTAGG-NS3R189BTS NA <NA>
## AAACCCATCAGCTGAT-NS3R189BTS NA <NA>
## AAACCCATCATTCACT-NS3R189BTS NA <NA>
## AAACCCATCCGGACGT-NS3R189BTS NA <NA>
## AAACCCATCCGTGGTG-NS3R189BTS NA <NA>
## AAACCCATCCTCTCTT-NS3R189BTS NA <NA>
## batch
## AAACCCACACTGGAAG-NS3R189BTS 1
## AAACCCACATGACGTT-NS3R189BTS 1
## AAACCCAGTCCAAATC-NS3R189BTS 1
## AAACCCAGTGAGTTTC-NS3R189BTS 1
## AAACCCAGTTCTTAGG-NS3R189BTS 1
## AAACCCATCAGCTGAT-NS3R189BTS 1
## AAACCCATCATTCACT-NS3R189BTS 1
## AAACCCATCCGGACGT-NS3R189BTS 1
## AAACCCATCCGTGGTG-NS3R189BTS 1
## AAACCCATCCTCTCTT-NS3R189BTS 1
allsamplespre$batch <- plyr::mapvalues(
x = allsamplespre$orig.ident,
from = sampleinformation$submitted_name,
to = sampleinformation$SampleBatch
)
head(allsamplespre)
## orig.ident nCount_RNA nFeature_RNA nCount_ADT
## AAACCCACAAGTCATC-NS3R189BTS NS3R189BTS 2431 1431 611
## AAACCCACACTGGAAG-NS3R189BTS NS3R189BTS 2676 1078 1191
## AAACCCACAGCACACC-NS3R189BTS NS3R189BTS 3394 1760 669
## AAACCCACAGGAGGAG-NS3R189BTS NS3R189BTS 2160 1279 1274
## AAACCCACATGACGTT-NS3R189BTS NS3R189BTS 2329 1271 1135
## AAACCCAGTAGTTACC-NS3R189BTS NS3R189BTS 5362 2261 2976
## AAACCCAGTCCAAATC-NS3R189BTS NS3R189BTS 7926 2660 2034
## AAACCCAGTCCATACA-NS3R189BTS NS3R189BTS 2239 1283 545
## AAACCCAGTGAGTTTC-NS3R189BTS NS3R189BTS 2339 1291 1058
## AAACCCAGTTCTTAGG-NS3R189BTS NS3R189BTS 3537 1608 936
## nFeature_ADT percent.mito percent.ribo percent.hb
## AAACCCACAAGTCATC-NS3R189BTS 77 4.1135335 18.510901 0.00000000
## AAACCCACACTGGAAG-NS3R189BTS 81 0.8221226 42.526158 0.00000000
## AAACCCACAGCACACC-NS3R189BTS 81 5.1856217 19.151444 0.00000000
## AAACCCACAGGAGGAG-NS3R189BTS 81 6.5277778 8.842593 0.00000000
## AAACCCACATGACGTT-NS3R189BTS 84 4.3795620 18.978102 0.04293688
## AAACCCAGTAGTTACC-NS3R189BTS 85 3.8418501 18.985453 0.01864976
## AAACCCAGTCCAAATC-NS3R189BTS 81 4.6177139 30.052990 0.02523341
## AAACCCAGTCCATACA-NS3R189BTS 81 5.0468959 19.830281 0.04466280
## AAACCCAGTGAGTTTC-NS3R189BTS 79 5.3014109 21.675930 0.00000000
## AAACCCAGTTCTTAGG-NS3R189BTS 78 2.2900763 23.353124 0.00000000
## batch
## AAACCCACAAGTCATC-NS3R189BTS 1
## AAACCCACACTGGAAG-NS3R189BTS 1
## AAACCCACAGCACACC-NS3R189BTS 1
## AAACCCACAGGAGGAG-NS3R189BTS 1
## AAACCCACATGACGTT-NS3R189BTS 1
## AAACCCAGTAGTTACC-NS3R189BTS 1
## AAACCCAGTCCAAATC-NS3R189BTS 1
## AAACCCAGTCCATACA-NS3R189BTS 1
## AAACCCAGTGAGTTTC-NS3R189BTS 1
## AAACCCAGTTCTTAGG-NS3R189BTS 1
# Assign sample types
allsamples$type <- plyr::mapvalues(
x = allsamples$orig.ident,
from = sampleinformation$submitted_name,
to = sampleinformation$SampleType
)
head(allsamples)
## orig.ident nCount_RNA nFeature_RNA nCount_ADT
## AAACCCACACTGGAAG-NS3R189BTS NS3R189BTS 2676 1078 1191
## AAACCCACATGACGTT-NS3R189BTS NS3R189BTS 2329 1271 1135
## AAACCCAGTCCAAATC-NS3R189BTS NS3R189BTS 7926 2660 2034
## AAACCCAGTGAGTTTC-NS3R189BTS NS3R189BTS 2339 1291 1058
## AAACCCAGTTCTTAGG-NS3R189BTS NS3R189BTS 3537 1608 936
## AAACCCATCAGCTGAT-NS3R189BTS NS3R189BTS 1758 948 1375
## AAACCCATCATTCACT-NS3R189BTS NS3R189BTS 1478 930 186
## AAACCCATCCGGACGT-NS3R189BTS NS3R189BTS 2045 1003 883
## AAACCCATCCGTGGTG-NS3R189BTS NS3R189BTS 5150 2003 2014
## AAACCCATCCTCTCTT-NS3R189BTS NS3R189BTS 2287 1306 645
## nFeature_ADT percent.mito percent.ribo percent.hb
## AAACCCACACTGGAAG-NS3R189BTS 81 0.8221226 42.52616 0.00000000
## AAACCCACATGACGTT-NS3R189BTS 84 4.3795620 18.97810 0.04293688
## AAACCCAGTCCAAATC-NS3R189BTS 81 4.6177139 30.05299 0.02523341
## AAACCCAGTGAGTTTC-NS3R189BTS 79 5.3014109 21.67593 0.00000000
## AAACCCAGTTCTTAGG-NS3R189BTS 78 2.2900763 23.35312 0.00000000
## AAACCCATCAGCTGAT-NS3R189BTS 84 5.1194539 26.67804 0.00000000
## AAACCCATCATTCACT-NS3R189BTS 48 2.6387009 24.56022 0.00000000
## AAACCCATCCGGACGT-NS3R189BTS 78 3.4229829 30.66015 0.00000000
## AAACCCATCCGTGGTG-NS3R189BTS 82 0.6019417 35.28155 0.00000000
## AAACCCATCCTCTCTT-NS3R189BTS 83 2.9296021 19.45780 0.00000000
## pANN_0.25_0.09_1477
## AAACCCACACTGGAAG-NS3R189BTS 0.04303198
## AAACCCACATGACGTT-NS3R189BTS 0.33004343
## AAACCCAGTCCAAATC-NS3R189BTS 0.17094355
## AAACCCAGTGAGTTTC-NS3R189BTS 0.31346230
## AAACCCAGTTCTTAGG-NS3R189BTS 0.30990920
## AAACCCATCAGCTGAT-NS3R189BTS 0.16818002
## AAACCCATCATTCACT-NS3R189BTS 0.23884722
## AAACCCATCCGGACGT-NS3R189BTS 0.05132254
## AAACCCATCCGTGGTG-NS3R189BTS 0.13896565
## AAACCCATCCTCTCTT-NS3R189BTS 0.29411765
## DF.classifications_0.25_0.09_1477
## AAACCCACACTGGAAG-NS3R189BTS Singlet
## AAACCCACATGACGTT-NS3R189BTS Singlet
## AAACCCAGTCCAAATC-NS3R189BTS Singlet
## AAACCCAGTGAGTTTC-NS3R189BTS Singlet
## AAACCCAGTTCTTAGG-NS3R189BTS Singlet
## AAACCCATCAGCTGAT-NS3R189BTS Singlet
## AAACCCATCATTCACT-NS3R189BTS Singlet
## AAACCCATCCGGACGT-NS3R189BTS Singlet
## AAACCCATCCGTGGTG-NS3R189BTS Singlet
## AAACCCATCCTCTCTT-NS3R189BTS Singlet
## pANN_0.25_0.09_795 DF.classifications_0.25_0.09_795
## AAACCCACACTGGAAG-NS3R189BTS NA <NA>
## AAACCCACATGACGTT-NS3R189BTS NA <NA>
## AAACCCAGTCCAAATC-NS3R189BTS NA <NA>
## AAACCCAGTGAGTTTC-NS3R189BTS NA <NA>
## AAACCCAGTTCTTAGG-NS3R189BTS NA <NA>
## AAACCCATCAGCTGAT-NS3R189BTS NA <NA>
## AAACCCATCATTCACT-NS3R189BTS NA <NA>
## AAACCCATCCGGACGT-NS3R189BTS NA <NA>
## AAACCCATCCGTGGTG-NS3R189BTS NA <NA>
## AAACCCATCCTCTCTT-NS3R189BTS NA <NA>
## batch type
## AAACCCACACTGGAAG-NS3R189BTS 1 HC
## AAACCCACATGACGTT-NS3R189BTS 1 HC
## AAACCCAGTCCAAATC-NS3R189BTS 1 HC
## AAACCCAGTGAGTTTC-NS3R189BTS 1 HC
## AAACCCAGTTCTTAGG-NS3R189BTS 1 HC
## AAACCCATCAGCTGAT-NS3R189BTS 1 HC
## AAACCCATCATTCACT-NS3R189BTS 1 HC
## AAACCCATCCGGACGT-NS3R189BTS 1 HC
## AAACCCATCCGTGGTG-NS3R189BTS 1 HC
## AAACCCATCCTCTCTT-NS3R189BTS 1 HC
allsamplespre$type <- plyr::mapvalues(
x = allsamplespre$orig.ident,
from = sampleinformation$submitted_name,
to = sampleinformation$SampleType
)
head(allsamplespre)
## orig.ident nCount_RNA nFeature_RNA nCount_ADT
## AAACCCACAAGTCATC-NS3R189BTS NS3R189BTS 2431 1431 611
## AAACCCACACTGGAAG-NS3R189BTS NS3R189BTS 2676 1078 1191
## AAACCCACAGCACACC-NS3R189BTS NS3R189BTS 3394 1760 669
## AAACCCACAGGAGGAG-NS3R189BTS NS3R189BTS 2160 1279 1274
## AAACCCACATGACGTT-NS3R189BTS NS3R189BTS 2329 1271 1135
## AAACCCAGTAGTTACC-NS3R189BTS NS3R189BTS 5362 2261 2976
## AAACCCAGTCCAAATC-NS3R189BTS NS3R189BTS 7926 2660 2034
## AAACCCAGTCCATACA-NS3R189BTS NS3R189BTS 2239 1283 545
## AAACCCAGTGAGTTTC-NS3R189BTS NS3R189BTS 2339 1291 1058
## AAACCCAGTTCTTAGG-NS3R189BTS NS3R189BTS 3537 1608 936
## nFeature_ADT percent.mito percent.ribo percent.hb
## AAACCCACAAGTCATC-NS3R189BTS 77 4.1135335 18.510901 0.00000000
## AAACCCACACTGGAAG-NS3R189BTS 81 0.8221226 42.526158 0.00000000
## AAACCCACAGCACACC-NS3R189BTS 81 5.1856217 19.151444 0.00000000
## AAACCCACAGGAGGAG-NS3R189BTS 81 6.5277778 8.842593 0.00000000
## AAACCCACATGACGTT-NS3R189BTS 84 4.3795620 18.978102 0.04293688
## AAACCCAGTAGTTACC-NS3R189BTS 85 3.8418501 18.985453 0.01864976
## AAACCCAGTCCAAATC-NS3R189BTS 81 4.6177139 30.052990 0.02523341
## AAACCCAGTCCATACA-NS3R189BTS 81 5.0468959 19.830281 0.04466280
## AAACCCAGTGAGTTTC-NS3R189BTS 79 5.3014109 21.675930 0.00000000
## AAACCCAGTTCTTAGG-NS3R189BTS 78 2.2900763 23.353124 0.00000000
## batch type
## AAACCCACAAGTCATC-NS3R189BTS 1 HC
## AAACCCACACTGGAAG-NS3R189BTS 1 HC
## AAACCCACAGCACACC-NS3R189BTS 1 HC
## AAACCCACAGGAGGAG-NS3R189BTS 1 HC
## AAACCCACATGACGTT-NS3R189BTS 1 HC
## AAACCCAGTAGTTACC-NS3R189BTS 1 HC
## AAACCCAGTCCAAATC-NS3R189BTS 1 HC
## AAACCCAGTCCATACA-NS3R189BTS 1 HC
## AAACCCAGTGAGTTTC-NS3R189BTS 1 HC
## AAACCCAGTTCTTAGG-NS3R189BTS 1 HC
# View type counts
table(allsamples$type)
##
## BCR-UV HC
## 10559 19630
table(allsamplespre$type)
##
## BCR-UV HC
## 14223 24781
# View batch counts
table(allsamples$batch)
##
## 1 4
## 19630 10559
table(allsamplespre$batch)
##
## 1 4
## 24781 14223
# View available slots
slotNames(allsamples)
## [1] "assays" "meta.data" "active.assay" "active.ident" "graphs"
## [6] "neighbors" "reductions" "images" "project.name" "misc"
## [11] "version" "commands" "tools"
# View data
head(allsamples)
## orig.ident nCount_RNA nFeature_RNA nCount_ADT
## AAACCCACACTGGAAG-NS3R189BTS NS3R189BTS 2676 1078 1191
## AAACCCACATGACGTT-NS3R189BTS NS3R189BTS 2329 1271 1135
## AAACCCAGTCCAAATC-NS3R189BTS NS3R189BTS 7926 2660 2034
## AAACCCAGTGAGTTTC-NS3R189BTS NS3R189BTS 2339 1291 1058
## AAACCCAGTTCTTAGG-NS3R189BTS NS3R189BTS 3537 1608 936
## AAACCCATCAGCTGAT-NS3R189BTS NS3R189BTS 1758 948 1375
## AAACCCATCATTCACT-NS3R189BTS NS3R189BTS 1478 930 186
## AAACCCATCCGGACGT-NS3R189BTS NS3R189BTS 2045 1003 883
## AAACCCATCCGTGGTG-NS3R189BTS NS3R189BTS 5150 2003 2014
## AAACCCATCCTCTCTT-NS3R189BTS NS3R189BTS 2287 1306 645
## nFeature_ADT percent.mito percent.ribo percent.hb
## AAACCCACACTGGAAG-NS3R189BTS 81 0.8221226 42.52616 0.00000000
## AAACCCACATGACGTT-NS3R189BTS 84 4.3795620 18.97810 0.04293688
## AAACCCAGTCCAAATC-NS3R189BTS 81 4.6177139 30.05299 0.02523341
## AAACCCAGTGAGTTTC-NS3R189BTS 79 5.3014109 21.67593 0.00000000
## AAACCCAGTTCTTAGG-NS3R189BTS 78 2.2900763 23.35312 0.00000000
## AAACCCATCAGCTGAT-NS3R189BTS 84 5.1194539 26.67804 0.00000000
## AAACCCATCATTCACT-NS3R189BTS 48 2.6387009 24.56022 0.00000000
## AAACCCATCCGGACGT-NS3R189BTS 78 3.4229829 30.66015 0.00000000
## AAACCCATCCGTGGTG-NS3R189BTS 82 0.6019417 35.28155 0.00000000
## AAACCCATCCTCTCTT-NS3R189BTS 83 2.9296021 19.45780 0.00000000
## pANN_0.25_0.09_1477
## AAACCCACACTGGAAG-NS3R189BTS 0.04303198
## AAACCCACATGACGTT-NS3R189BTS 0.33004343
## AAACCCAGTCCAAATC-NS3R189BTS 0.17094355
## AAACCCAGTGAGTTTC-NS3R189BTS 0.31346230
## AAACCCAGTTCTTAGG-NS3R189BTS 0.30990920
## AAACCCATCAGCTGAT-NS3R189BTS 0.16818002
## AAACCCATCATTCACT-NS3R189BTS 0.23884722
## AAACCCATCCGGACGT-NS3R189BTS 0.05132254
## AAACCCATCCGTGGTG-NS3R189BTS 0.13896565
## AAACCCATCCTCTCTT-NS3R189BTS 0.29411765
## DF.classifications_0.25_0.09_1477
## AAACCCACACTGGAAG-NS3R189BTS Singlet
## AAACCCACATGACGTT-NS3R189BTS Singlet
## AAACCCAGTCCAAATC-NS3R189BTS Singlet
## AAACCCAGTGAGTTTC-NS3R189BTS Singlet
## AAACCCAGTTCTTAGG-NS3R189BTS Singlet
## AAACCCATCAGCTGAT-NS3R189BTS Singlet
## AAACCCATCATTCACT-NS3R189BTS Singlet
## AAACCCATCCGGACGT-NS3R189BTS Singlet
## AAACCCATCCGTGGTG-NS3R189BTS Singlet
## AAACCCATCCTCTCTT-NS3R189BTS Singlet
## pANN_0.25_0.09_795 DF.classifications_0.25_0.09_795
## AAACCCACACTGGAAG-NS3R189BTS NA <NA>
## AAACCCACATGACGTT-NS3R189BTS NA <NA>
## AAACCCAGTCCAAATC-NS3R189BTS NA <NA>
## AAACCCAGTGAGTTTC-NS3R189BTS NA <NA>
## AAACCCAGTTCTTAGG-NS3R189BTS NA <NA>
## AAACCCATCAGCTGAT-NS3R189BTS NA <NA>
## AAACCCATCATTCACT-NS3R189BTS NA <NA>
## AAACCCATCCGGACGT-NS3R189BTS NA <NA>
## AAACCCATCCGTGGTG-NS3R189BTS NA <NA>
## AAACCCATCCTCTCTT-NS3R189BTS NA <NA>
## batch type
## AAACCCACACTGGAAG-NS3R189BTS 1 HC
## AAACCCACATGACGTT-NS3R189BTS 1 HC
## AAACCCAGTCCAAATC-NS3R189BTS 1 HC
## AAACCCAGTGAGTTTC-NS3R189BTS 1 HC
## AAACCCAGTTCTTAGG-NS3R189BTS 1 HC
## AAACCCATCAGCTGAT-NS3R189BTS 1 HC
## AAACCCATCATTCACT-NS3R189BTS 1 HC
## AAACCCATCCGGACGT-NS3R189BTS 1 HC
## AAACCCATCCGTGGTG-NS3R189BTS 1 HC
## AAACCCATCCTCTCTT-NS3R189BTS 1 HC
# Cleanup any old metadata columns from doublet finder
pANN.name = as.vector(colnames(allsamples@meta.data)[grepl("pANN", colnames(allsamples@meta.data))])
for (i in 1:length(pANN.name))
{
allsamples@meta.data[, pANN.name[i]]<-NULL
}
DFclass.name = as.vector(colnames(allsamples@meta.data)[grepl("DF.classification", colnames(allsamples@meta.data))])
for (i in 1:length(DFclass.name))
{
allsamples@meta.data[, DFclass.name[i]]<-NULL
}
# Generate before filtering quality plots
qcbefore=VlnPlot(allsamplespre,features = c("nFeature_RNA", "nCount_RNA","percent.mito","percent.ribo"),ncol = 2, pt.size = 0) +
NoLegend()
qcbefore
# Save the plot in current working directory
ggsave(paste(outputpath,"before-qc-violineplot.pdf",sep=""), plot = qcbefore, width = 15, height = 20, units = "cm")
# Generate after filtering quality plots
qcafter=VlnPlot(allsamples,features = c("nFeature_RNA", "nCount_RNA","percent.mito","percent.ribo"),ncol = 2, pt.size = 0) +
NoLegend()
qcafter
# Save the plot in current working directory
ggsave(paste(outputpath,"after-qc-violineplot.pdf",sep=""), plot = qcafter, width = 15, height = 20, units = "cm")
# Save QC session data in the working directory
save.image(file=paste(outputpath,"SeuratObjectAfterQc.RData",sep=""))
# Reference
# Doublet prediction
# https://nbisweden.github.io/workshop-scRNAseq/labs/compiled/seurat/seurat_01_qc.html#Predict_doublets
sessionInfo()
## R version 4.2.2 (2022-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
##
## Matrix products: default
## BLAS/LAPACK: /usr/local/intel/compilers_and_libraries_2020.2.254/linux/mkl/lib/intel64_lin/libmkl_rt.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] KernSmooth_2.23-20 fields_14.1 viridis_0.6.2
## [4] viridisLite_0.4.1 spam_2.9-1 plotly_4.10.1
## [7] ggplot2_3.4.0 SeuratObject_4.1.3 Seurat_4.2.1
## [10] DoubletFinder_2.0.3
##
## loaded via a namespace (and not attached):
## [1] systemfonts_1.0.4 plyr_1.8.7 igraph_1.3.5
## [4] lazyeval_0.2.2 sp_1.5-1 splines_4.2.2
## [7] listenv_0.8.0 scattermore_0.8 digest_0.6.30
## [10] htmltools_0.5.3 fansi_1.0.3 magrittr_2.0.3
## [13] tensor_1.5 cluster_2.1.4 ROCR_1.0-11
## [16] remotes_2.4.2 globals_0.16.1 matrixStats_0.62.0
## [19] spatstat.sparse_3.0-0 colorspace_2.0-3 ggrepel_0.9.2
## [22] textshaping_0.3.6 xfun_0.34 dplyr_1.0.10
## [25] crayon_1.5.2 jsonlite_1.8.3 progressr_0.11.0
## [28] spatstat.data_3.0-0 survival_3.4-0 zoo_1.8-11
## [31] glue_1.6.2 polyclip_1.10-4 gtable_0.3.1
## [34] leiden_0.4.3 future.apply_1.10.0 maps_3.4.1
## [37] abind_1.4-5 scales_1.2.1 DBI_1.1.3
## [40] spatstat.random_3.0-1 miniUI_0.1.1.1 Rcpp_1.0.9
## [43] xtable_1.8-4 reticulate_1.26 bit_4.0.4
## [46] dotCall64_1.0-2 htmlwidgets_1.5.4 httr_1.4.4
## [49] RColorBrewer_1.1-3 ellipsis_0.3.2 ica_1.0-3
## [52] pkgconfig_2.0.3 farver_2.1.1 sass_0.4.2
## [55] uwot_0.1.14 deldir_1.0-6 utf8_1.2.2
## [58] tidyselect_1.2.0 labeling_0.4.2 rlang_1.0.6
## [61] reshape2_1.4.4 later_1.3.0 munsell_0.5.0
## [64] tools_4.2.2 cachem_1.0.6 cli_3.4.1
## [67] generics_0.1.3 ggridges_0.5.4 evaluate_0.18
## [70] stringr_1.4.1 fastmap_1.1.0 yaml_2.3.6
## [73] ragg_1.2.4 goftest_1.2-3 knitr_1.40
## [76] bit64_4.0.5 fitdistrplus_1.1-8 purrr_0.3.5
## [79] RANN_2.6.1 pbapply_1.5-0 future_1.29.0
## [82] nlme_3.1-160 mime_0.12 ggrastr_1.0.1
## [85] hdf5r_1.3.7 compiler_4.2.2 beeswarm_0.4.0
## [88] curl_4.3.3 png_0.1-7 spatstat.utils_3.0-1
## [91] tibble_3.1.8 bslib_0.4.1 stringi_1.7.8
## [94] lattice_0.20-45 Matrix_1.5-3 vctrs_0.5.0
## [97] pillar_1.8.1 lifecycle_1.0.3 spatstat.geom_3.0-3
## [100] lmtest_0.9-40 jquerylib_0.1.4 RcppAnnoy_0.0.20
## [103] data.table_1.14.4 cowplot_1.1.1 irlba_2.3.5.1
## [106] httpuv_1.6.6 patchwork_1.1.2 R6_2.5.1
## [109] promises_1.2.0.1 gridExtra_2.3 vipor_0.4.5
## [112] parallelly_1.32.1 codetools_0.2-18 MASS_7.3-58.1
## [115] assertthat_0.2.1 withr_2.5.0 sctransform_0.3.5
## [118] parallel_4.2.2 grid_4.2.2 tidyr_1.2.1
## [121] rmarkdown_2.18 Rtsne_0.16 spatstat.explore_3.0-5
## [124] shiny_1.7.3 ggbeeswarm_0.6.0
knitr::purl(input="/data/../TotalSeq/Tools/qualityControl.Rmd", output="/data/../TotalSeq/Tools/qualityControl.R")
##
|
| | 0%
|
|.... | 6%
|
|......... | 12%
|
|............. | 19%
|
|.................. | 25%
|
|...................... | 31%
|
|.......................... | 38%
|
|............................... | 44%
|
|................................... | 50%
|
|....................................... | 56%
|
|............................................ | 62%
|
|................................................ | 69%
|
|.................................................... | 75%
|
|......................................................... | 81%
|
|............................................................. | 88%
|
|.................................................................. | 94%
|
|......................................................................| 100%
## [1] "/data/../TotalSeq/Tools/qualityControl.R"
file.copy(from = "/data/../TotalSeq/Tools/qualityControl.html",
to = "/data/../TotalSeq/Reports/site/quality-control.html")
## [1] FALSE
file.remove("/data/../TotalSeq/Tools/qualityControl.html")
## [1] FALSE