3 BM1
3.1 Reading datasets
# Running data with CopyKit for the primary breast sample
<-
bm1_breast runVarbin(
"/mnt/lab/users/dminussi/projects/CopyKit_Manuscript_Code/datasets/BM1/breast/marked_bams/",
remove_Y = TRUE
)
## Counting reads for genome hg38 and resolution: 220kb
## 326 bam files had less than 10 mean bincounts and were removed.
## Performing GC correction.
## iteration: 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442
## Smoothing bin counts.
## iteration: 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442
## Running segmentation algorithm: CBS for genome hg38
## iteration: 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442
## Merging levels.
## iteration: 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442
## Done.
# Finding diploid and low-quality cells and excluding it from the copykit object
<- findOutliers(bm1_breast, resolution = 0.8) bm1_breast
## Calculating correlation matrix.
## Marked 288 cells as outliers.
## Adding information to metadata. Access with colData(scCNA).
## Done.
<- findAneuploidCells(bm1_breast) bm1_breast
## number of iterations= 25
## Copykit detected 83 that are possibly diploid cells using a resolution of: 0.052
## Added information to colData(CopyKit).
<- bm1_breast[, colData(bm1_breast)$outlier == FALSE]
bm1_breast <- bm1_breast[, colData(bm1_breast)$is_aneuploid == TRUE]
bm1_breast
# Adding the tissue information to colData
colData(bm1_breast)$timepoint <- 'breast'
# ~~~~~~~~~~~~~~~~~~~~~~~
# Running data with CopyKit for the liver metastasis sample
<-
bm1_liver runVarbin(
"/mnt/lab/users/dminussi/projects/CopyKit_Manuscript_Code/datasets/BM1/liver/marked_bams/",
remove_Y = TRUE
)
## Counting reads for genome hg38 and resolution: 220kb
## 40 bam files had less than 10 mean bincounts and were removed.
## Performing GC correction.
## iteration: 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728
## Smoothing bin counts.
## iteration: 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728
## Running segmentation algorithm: CBS for genome hg38
## iteration: 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724
## Warning in selectChildren(jobs, timeout): error 'No child processes' in select
## 725726727728
## Merging levels.
## iteration: 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728
## Done.
# Finding diploid and low-quality cells and excluding it from the copykit object
<- findOutliers(bm1_liver) bm1_liver
## Calculating correlation matrix.
## Marked 295 cells as outliers.
## Adding information to metadata. Access with colData(scCNA).
## Done.
<- findAneuploidCells(bm1_liver) bm1_liver
## number of iterations= 15
## Copykit detected 284 that are possibly diploid cells using a resolution of: 0.051
## Added information to colData(CopyKit).
<- bm1_liver[, colData(bm1_liver)$outlier == FALSE]
bm1_liver <- bm1_liver[, colData(bm1_liver)$is_aneuploid == TRUE]
bm1_liver
# Adding the tissue information to colData
colData(bm1_liver)$timepoint <- 'liver'
# Running data with CopyKit for the pleural effusion metastasis sample
<-
bm1_pleural runVarbin(
"/mnt/lab/users/dminussi/projects/CopyKit_Manuscript_Code/datasets/BM1/pleural/marked_bams/",
remove_Y = TRUE
)
## Counting reads for genome hg38 and resolution: 220kb
## 33 bam files had less than 10 mean bincounts and were removed.
## Performing GC correction.
## iteration: 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
## Smoothing bin counts.
## iteration: 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
## Running segmentation algorithm: CBS for genome hg38
## iteration: 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
## Merging levels.
## iteration: 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
## Done.
# Finding diploid and low-quality cells and excluding it from the copykit object
<- findOutliers(bm1_pleural) bm1_pleural
## Calculating correlation matrix.
## Marked 106 cells as outliers.
## Adding information to metadata. Access with colData(scCNA).
## Done.
<- findAneuploidCells(bm1_pleural) bm1_pleural
## number of iterations= 14
## Copykit detected 228 that are possibly diploid cells using a resolution of: 0.048
## Added information to colData(CopyKit).
<- bm1_pleural[, colData(bm1_pleural)$outlier == FALSE]
bm1_pleural <-
bm1_pleural colData(bm1_pleural)$is_aneuploid == TRUE]
bm1_pleural[,
# Adding the tissue information to colData
colData(bm1_pleural)$timepoint <- 'pleural'
Merging the three datasets
# Merging the three copykit objects
<- cbind(bm1_breast,
bm1_merged
bm1_liver, bm1_pleural)
From here on the analysis follow the same steps as a standard CopyKit workflow analysis.
3.2 Running UMAP and Clustering
<- runUmap(bm1_merged) bm1_merged
## Using assay: logr
## Embedding data with UMAP. Using seed 17
## Access reduced dimensions slot with: reducedDim(scCNA, 'umap').
## Done.
<- findSuggestedK(bm1_merged) bm1_merged
## Calculating jaccard similarity for k range: 10 11 12 13 14 15 16 17 18 19 20 21
##
## iteration: 123456789101112
## Suggested k = 12 with median jaccard similarity of: 0.99
<- plotSuggestedK(bm1_merged)
bm1_merged_suggestedk bm1_merged_suggestedk
<- findClusters(bm1_merged) bm1_merged
## Using suggested k_subclones = 12
## Finding clusters, using method: hdbscan
## Found 4 subclones.
## Done.
<- calcConsensus(bm1_merged) bm1_merged
## iteration: 1234
<- runConsensusPhylo(bm1_merged)
bm1_merged
plotHeatmap(bm1_merged, label = c('subclones', 'timepoint'))
## Plotting Heatmap.
## The automatically generated colors map from the minus and plus 99^th of the absolute values in
## the matrix. There are outliers in the matrix whose patterns might be hidden by this color
## mapping. You can manually set the color to `col` argument.
##
## Use `suppressMessages()` to turn off this message.
<- plotUmap(bm1_merged, label = 'subclones') bm1_merged_umap_p
## Plotting Umap.
## Coloring by: subclones.
bm1_merged_umap_p
# Cluster c3 from the pleural sample is a cluster of tumor-normal doublets
# We can subset out of the CopyKit object in a similar way to the filtering
# functions with the information from colData
<- bm1_merged[, colData(bm1_merged)$subclones != 'c3']
bm1_merged
# Re-clustering the sample after doublet removal
# This sample has a smaller sample size, therefore we are reducing the
# n_neighbors parameter from the UMAP and increasing min_dist
<- runUmap(bm1_merged, n_neighbors = 10, min_dist = 0.1) bm1_merged
## Using assay: logr
## Embedding data with UMAP. Using seed 17
## Access reduced dimensions slot with: reducedDim(scCNA, 'umap').
## Done.
# Grid Search of Jaccard Similarity (cluster stability)
<- findSuggestedK(bm1_merged) bm1_merged
## Calculating jaccard similarity for k range: 10 11 12 13 14 15 16 17 18 19 20
##
## iteration: 1234567891011
## Suggested k = 15 with median jaccard similarity of: 0.988
<- findClusters(bm1_merged) bm1_merged
## Using suggested k_subclones = 15
## Finding clusters, using method: hdbscan
## Found 4 subclones.
## Done.
# Plotting the UMAP colored by the tissue of origin from the colData information
<- plotUmap(bm1_merged, label = 'timepoint') bm1_merged_tp_umap_p
## Plotting Umap.
## Coloring by: timepoint.
bm1_merged_tp_umap_p
# Plotting the UMAP colored by the subclones from the colData information
<- plotUmap(bm1_merged, label = 'subclones') bm1_merged_umap_p
## Plotting Umap.
## Coloring by: subclones.
bm1_merged_umap_p
3.3 Consensus tree
To root the tree, we will use an inferred Most Recent Common Ancestral from the primary tumor and provide that as an argument to the runConsensusPhylo function. This consensus tree will be used by plotHeatmap to order the subclones on the plot
# The primary sample per se has very few cells so we will add a subclone
# information to the colData to use the later inferMrca function in CopyKit
colData(bm1_breast)$subclones <- 'c1'
<- calcConsensus(bm1_breast) bm1_breast
## iteration: 1
# Inferring the MRCA on the primary breast sample
<- inferMrca(bm1_breast)
bm1_breast
# calculating the consensus of the Merged dataset and using the inferred
# primary MRCA as the root of the tree
<- calcConsensus(bm1_merged) bm1_merged
## iteration: 1234
<- runConsensusPhylo(bm1_merged,
bm1_merged root = 'user',
root_user = metadata(bm1_breast)$inferred_mrca)
# Rotating branches
consensusPhylo(bm1_merged) <-
::rotate(consensusPhylo(bm1_merged), 6)
ape
<-
bm1_merged_consensus_phylo plotPhylo(bm1_merged, label = 'subclones', consensus = TRUE)
bm1_merged_consensus_phylo
# Calculating cophenetic distances between subclones
::cophenetic.phylo(consensusPhylo(bm1_merged)) ape
## c1 c2 c3 c4
## c1 0.0000 507.340 998.2312 1430.255
## c2 507.3400 0.000 1284.0087 1716.032
## c3 998.2312 1284.009 0.0000 1634.176
## c4 1430.2550 1716.032 1634.1762 0.000
# Plotting the copy number heatmap with annotation data from the subclones and
# the tissue of origin
plotHeatmap(bm1_merged, label = c('subclones', 'timepoint'))
## Plotting Heatmap.
# Plotting a consensus heatmap with the plotHeatmap function.
# The annotation represents the subclones and relevant cancer genes are marked.
plotHeatmap(
bm1_merged,label = 'subclones',
consensus = TRUE,
genes = c(
"MYC",
"MYB",
"BRCA1",
"ERBB2",
"CDH1",
"FGFR1",
"AKT2",
"CDK4",
"CCNE1",
"CCND1",
"MTOR",
"FGF10",
"BRAF",
"AURKA"
) )
## Plotting Heatmap.
## The automatically generated colors map from the minus and plus 99^th of the absolute values in
## the matrix. There are outliers in the matrix whose patterns might be hidden by this color
## mapping. You can manually set the color to `col` argument.
##
## Use `suppressMessages()` to turn off this message.
# Using the plotGeneCopy function to plot the segment ratios mean values for
# the selected genes and coloring it by the tissue of origin from the metadata
<-
bm1_merged_gc plotGeneCopy(
bm1_merged,genes = c(
"MYC",
"MYB",
"BRCA1",
"ERBB2",
"CDH1",
"FGFR1",
"AKT2",
"CDK4",
"CCNE1",
"CCND1",
"MTOR",
"FGF10",
"BRAF",
"AURKA"
),label = 'timepoint',
dodge.width = .8
+
) scale_fill_hue()
## Coloring by: timepoint
## Scale for 'fill' is already present. Adding another scale for 'fill', which will replace the existing
## scale.
bm1_merged_gc