Workshop exercises
Day 1
## ====================================
## Independent exercise testing
## ====================================
# --------------------------------------------------------
# Day 1 Exercises: Exploring QC patterns and filtering thresholds
# --------------------------------------------------------
# Day 2 Exercises: Exploring clustering thresholds
# --------------------------------------------------------
# Day 3 Exercises: Exploring annotations and differential expression
# First - clear current Seurat object to free up memory & remove current results
rm(geo_so)
# Then load in integrated data & reset object on each iteration to avoid exceeding allocated space
geo2_so = readRDS('/home/workshop/rcavalca/ISC_R/results/rdata/geo_so_sct_integrated.rds')
# look at elbow plot
ElbowPlot(geo2_so, ndims = 50, reduction = 'unintegrated.sct.pca')
## Clustering
# Round 1: manually adjust number of PCs to include in clustering
#pcs = 20 # increase number of PCs
pcs = 10 # reduce number of PCs
# generate nearest neighbor (graph), using selected number of PCs
geo2_so = FindNeighbors(geo2_so, dims = 1:pcs, reduction = 'integrated.sct.rpca')
# Round 2: adjust resolution after testing PCs (remember this only impacts the how the boundaries of the neighbors are drawn, not the underlying NN graph/structure)
res = 0.4
# res = 1.0
# res = 0.2
# generate clusters, using
geo2_so = FindClusters(geo2_so, resolution = res, cluster.name = 'integrated.sct.rpca.clusters')
# look at meta.data to see cluster labels
head(geo2_so@meta.data)
# Prep for UMAP
geo2_so = RunUMAP(geo2_so, dims = 1:pcs, reduction = 'integrated.sct.rpca',
reduction.name = 'umap.integrated.sct.rpca')
geo2_so
# look at clustering results
post_integration_umap_clusters_testing =
DimPlot(geo2_so, group.by = 'seurat_clusters', label = TRUE,
reduction = 'umap.integrated.sct.rpca') + NoLegend()
post_integration_umap_clusters_testing # look at plot
# output to file, including the number of PCs and resolution used to generate results
ggsave(filename = paste0('results/figures/umap_integrated_sct_clusters',
pcs,'PCs',res,'res.png'),
plot = post_integration_umap_plot_clusters,
width = 8, height = 6, units = 'in')
## generate markers and annotate clusters to see how that changes
# prep for cluster comparisons
geo2_so = SetIdent(geo2_so, value = 'integrated.sct.rpca.clusters')
geo2_so = PrepSCTFindMarkers(geo2_so)
# run comparisons for each cluster to generate markers
geo2_markers = FindAllMarkers(geo2_so, only.pos = TRUE)
# run cell type predictions for current clustering results
library(scCATCH)
# create scCATCH object, using count data
geo2_catch = createscCATCH(data = geo2_so@assays$SCT@counts, cluster = as.character(Idents(geo2_so)))
# add marker genes to use for predictions
catch_markers = geo2_markers %>% rename('logfc' = 'avg_log2FC')
geo2_catch@markergene = geo2_markers
# specify tissues/cell-types from the scCATCH reference
geo2_catch@marker = cellmatch[cellmatch$species == 'Mouse' & cellmatch$tissue %in% c('Blood', 'Peripheral Blood', 'Muscle', 'Skeletal muscle', 'Epidermis', 'Skin'), ]
# run scCATCH to generate predictions
geo2_catch = findcelltype(geo2_catch)
# look at the predictions
geo2_catch@celltype %>% select(cluster, cell_type, celltype_score)
## annotate clusters
# Extract the cell types only to merge into the meta.data
catch_celltypes = geo2_catch@celltype %>% select(cluster, cell_type)
# Merge cell types in but as a new table to slide into @meta.data
new_metadata = geo2_so@meta.data %>% left_join(catch_celltypes,
by = c('integrated.sct.rpca.clusters' = 'cluster'))
rownames(new_metadata) = rownames(geo2_so@meta.data) # We are implicitly relying on the same row order!
# Replace the meta.data
geo2_so@meta.data = new_metadata
head(geo2_so@meta.data)
catch_umap_plot = DimPlot(geo2_so, group.by = 'cell_type',
label = TRUE, reduction = 'umap.integrated.sct.rpca')
catch_umap_plot
#########
## Extension - how might you generate DE comparisons between D21 and D7 for all annotated clusters?
Day 2
## ====================================
## Independent exercise testing
## ====================================
# --------------------------------------------------------
# Day 1 Exercises: Exploring QC patterns and filtering thresholds
# --------------------------------------------------------
# Day 2 Exercises: Exploring clustering thresholds
# --------------------------------------------------------
# Day 3 Exercises: Exploring annotations and differential expression
# First - clear current Seurat object to free up memory & remove current results
rm(geo_so)
# Then load in integrated data & reset object on each iteration to avoid exceeding allocated space
geo2_so = readRDS('/home/workshop/rcavalca/ISC_R/results/rdata/geo_so_sct_integrated.rds')
# look at elbow plot
ElbowPlot(geo2_so, ndims = 50, reduction = 'unintegrated.sct.pca')
## Clustering
# Round 1: manually adjust number of PCs to include in clustering
#pcs = 20 # increase number of PCs
pcs = 10 # reduce number of PCs
# generate nearest neighbor (graph), using selected number of PCs
geo2_so = FindNeighbors(geo2_so, dims = 1:pcs, reduction = 'integrated.sct.rpca')
# Round 2: adjust resolution after testing PCs (remember this only impacts the how the boundaries of the neighbors are drawn, not the underlying NN graph/structure)
res = 0.4
# res = 1.0
# res = 0.2
# generate clusters, using
geo2_so = FindClusters(geo2_so, resolution = res, cluster.name = 'integrated.sct.rpca.clusters')
# look at meta.data to see cluster labels
head(geo2_so@meta.data)
# Prep for UMAP
geo2_so = RunUMAP(geo2_so, dims = 1:pcs, reduction = 'integrated.sct.rpca',
reduction.name = 'umap.integrated.sct.rpca')
geo2_so
# look at clustering results
post_integration_umap_clusters_testing =
DimPlot(geo2_so, group.by = 'seurat_clusters', label = TRUE,
reduction = 'umap.integrated.sct.rpca') + NoLegend()
post_integration_umap_clusters_testing # look at plot
# output to file, including the number of PCs and resolution used to generate results
ggsave(filename = paste0('results/figures/umap_integrated_sct_clusters',
pcs,'PCs',res,'res.png'),
plot = post_integration_umap_plot_clusters,
width = 8, height = 6, units = 'in')
## generate markers and annotate clusters to see how that changes
# prep for cluster comparisons
geo2_so = SetIdent(geo2_so, value = 'integrated.sct.rpca.clusters')
geo2_so = PrepSCTFindMarkers(geo2_so)
# run comparisons for each cluster to generate markers
geo2_markers = FindAllMarkers(geo2_so, only.pos = TRUE)
# run cell type predictions for current clustering results
library(scCATCH)
# create scCATCH object, using count data
geo2_catch = createscCATCH(data = geo2_so@assays$SCT@counts, cluster = as.character(Idents(geo2_so)))
# add marker genes to use for predictions
catch_markers = geo2_markers %>% rename('logfc' = 'avg_log2FC')
geo2_catch@markergene = geo2_markers
# specify tissues/cell-types from the scCATCH reference
geo2_catch@marker = cellmatch[cellmatch$species == 'Mouse' & cellmatch$tissue %in% c('Blood', 'Peripheral Blood', 'Muscle', 'Skeletal muscle', 'Epidermis', 'Skin'), ]
# run scCATCH to generate predictions
geo2_catch = findcelltype(geo2_catch)
# look at the predictions
geo2_catch@celltype %>% select(cluster, cell_type, celltype_score)
## annotate clusters
# Extract the cell types only to merge into the meta.data
catch_celltypes = geo2_catch@celltype %>% select(cluster, cell_type)
# Merge cell types in but as a new table to slide into @meta.data
new_metadata = geo2_so@meta.data %>% left_join(catch_celltypes,
by = c('integrated.sct.rpca.clusters' = 'cluster'))
rownames(new_metadata) = rownames(geo2_so@meta.data) # We are implicitly relying on the same row order!
# Replace the meta.data
geo2_so@meta.data = new_metadata
head(geo2_so@meta.data)
catch_umap_plot = DimPlot(geo2_so, group.by = 'cell_type',
label = TRUE, reduction = 'umap.integrated.sct.rpca')
catch_umap_plot
#########
## Extension - how might you generate DE comparisons between D21 and D7 for all annotated clusters?
Day 3
## ====================================
## Independent exercise testing
## ====================================
# --------------------------------------------------------
# Day 1 Exercises: Exploring QC patterns and filtering thresholds
# --------------------------------------------------------
# Day 2 Exercises: Exploring clustering thresholds
# --------------------------------------------------------
# Day 3 Exercises: Exploring annotations and differential expression
# First - clear current Seurat object to free up memory & remove current results
rm(geo_so)
# Then load in integrated data & reset object on each iteration to avoid exceeding allocated space
geo2_so = readRDS('/home/workshop/rcavalca/ISC_R/results/rdata/geo_so_sct_integrated.rds')
# look at elbow plot
ElbowPlot(geo2_so, ndims = 50, reduction = 'unintegrated.sct.pca')
## Clustering
# Round 1: manually adjust number of PCs to include in clustering
#pcs = 20 # increase number of PCs
pcs = 10 # reduce number of PCs
# generate nearest neighbor (graph), using selected number of PCs
geo2_so = FindNeighbors(geo2_so, dims = 1:pcs, reduction = 'integrated.sct.rpca')
# Round 2: adjust resolution after testing PCs (remember this only impacts the how the boundaries of the neighbors are drawn, not the underlying NN graph/structure)
res = 0.4
# res = 1.0
# res = 0.2
# generate clusters, using
geo2_so = FindClusters(geo2_so, resolution = res, cluster.name = 'integrated.sct.rpca.clusters')
# look at meta.data to see cluster labels
head(geo2_so@meta.data)
# Prep for UMAP
geo2_so = RunUMAP(geo2_so, dims = 1:pcs, reduction = 'integrated.sct.rpca',
reduction.name = 'umap.integrated.sct.rpca')
geo2_so
# look at clustering results
post_integration_umap_clusters_testing =
DimPlot(geo2_so, group.by = 'seurat_clusters', label = TRUE,
reduction = 'umap.integrated.sct.rpca') + NoLegend()
post_integration_umap_clusters_testing # look at plot
# output to file, including the number of PCs and resolution used to generate results
ggsave(filename = paste0('results/figures/umap_integrated_sct_clusters',
pcs,'PCs',res,'res.png'),
plot = post_integration_umap_plot_clusters,
width = 8, height = 6, units = 'in')
## generate markers and annotate clusters to see how that changes
# prep for cluster comparisons
geo2_so = SetIdent(geo2_so, value = 'integrated.sct.rpca.clusters')
geo2_so = PrepSCTFindMarkers(geo2_so)
# run comparisons for each cluster to generate markers
geo2_markers = FindAllMarkers(geo2_so, only.pos = TRUE)
# run cell type predictions for current clustering results
library(scCATCH)
# create scCATCH object, using count data
geo2_catch = createscCATCH(data = geo2_so@assays$SCT@counts, cluster = as.character(Idents(geo2_so)))
# add marker genes to use for predictions
catch_markers = geo2_markers %>% rename('logfc' = 'avg_log2FC')
geo2_catch@markergene = geo2_markers
# specify tissues/cell-types from the scCATCH reference
geo2_catch@marker = cellmatch[cellmatch$species == 'Mouse' & cellmatch$tissue %in% c('Blood', 'Peripheral Blood', 'Muscle', 'Skeletal muscle', 'Epidermis', 'Skin'), ]
# run scCATCH to generate predictions
geo2_catch = findcelltype(geo2_catch)
# look at the predictions
geo2_catch@celltype %>% select(cluster, cell_type, celltype_score)
## annotate clusters
# Extract the cell types only to merge into the meta.data
catch_celltypes = geo2_catch@celltype %>% select(cluster, cell_type)
# Merge cell types in but as a new table to slide into @meta.data
new_metadata = geo2_so@meta.data %>% left_join(catch_celltypes,
by = c('integrated.sct.rpca.clusters' = 'cluster'))
rownames(new_metadata) = rownames(geo2_so@meta.data) # We are implicitly relying on the same row order!
# Replace the meta.data
geo2_so@meta.data = new_metadata
head(geo2_so@meta.data)
catch_umap_plot = DimPlot(geo2_so, group.by = 'cell_type',
label = TRUE, reduction = 'umap.integrated.sct.rpca')
catch_umap_plot
#########
## Extension - how might you generate DE comparisons between D21 and D7 for all annotated clusters?