Finalize prop matrix computation. Rework pheatmap. Enhance transcript plot.... (e4668cbb) · Commits · github_fork / DTUrtle

.Rbuildignore

+3 −0

Original line number	Diff line number	Diff line
		^.*\.Rproj$
		^\.Rproj\.user$
		^_pkgdown\.yml$
		^docs$
		^pkgdown$

.gitignore

0 → 100644

+4 −0

Original line number	Diff line number	Diff line
		.Rproj.user
		.Rhistory
		.RData
		.Ruserdata

DESCRIPTION

+3 −3

Original line number	Diff line number	Diff line
		@@ -2,13 +2,13 @@ Package: DTUrtle
		Type: Package
		Title: Differential Transcript Usage analysis
		Version: 0.1.0
		Author: Tobias Tekath
		Maintainer: Tobias Tekath <tobias.tekath@wwu.de>
		Authors@R: person(given = "Tobias", family = "Tekath", role = c("aut", "cre"), email = "tobias.tekath@gmail.com")
		Description: Easily perform Differential Transcript Usage (DTU) analysis for bulk or single-cell RNAseq data. Needs transcript level counts.
		License: GPL (>= 3)
		Encoding: UTF-8
		LazyData: true
		Depends: R (>= 3.4.0)
		Imports: Matrix, tximport, sparseDRIMSeq, stageR, rtracklayer, formattable, DT, Gviz, assertthat, scales, ggplot2, reshape2, BiocParallel, Matrix.utils, pheatmap
		Imports: Matrix, tximport, sparseDRIMSeq, stageR, rtracklayer, formattable, DT, Gviz, assertthat, scales, ggplot2, reshape2, BiocParallel, Matrix.utils, pheatmap, methods, GenomicRanges
		Suggests: GenomeInfoDb, Seurat (>= 3.0.0)
		RoxygenNote: 7.1.0
		Roxygen: list(markdown = TRUE)

NAMESPACE

+1 −0

Original line number	Diff line number	Diff line
		@@ -12,6 +12,7 @@ export(move_columns_to_front)
		export(one_to_one_mapping)
		export(plot_dtu_table)
		export(plot_proportion_barplot)
		export(plot_proportion_pheatmap)
		export(plot_transcripts_view)
		export(posthoc_and_stager)
		export(rm_version)

R/dtu_analysis.R

+33 −30

Original line number	Diff line number	Diff line
		@@ -18,7 +18,9 @@
		#' - `'stringtie'`
		#' - `'sailfish'`
		#' - `'none'`
		#' @param ... Further parameters to the specific tximport call. See \code{\link[tximport:tximport]{tximport}} for available parameters.
		#'
		#' @inheritDotParams tximport::tximport -files -type
		#'
		#'
		#' @return - For bulk data: A combined count matrix for all specified samples.
		#' - For single-cell data (`type='alevin'`): A list of count matrices per sample. Should be combined and optionally added to a Seurat object with [combine_to_matrix()].
		@@ -38,7 +40,7 @@ import_counts <- function(files, type, ...){
		if(type=="alevin"\|\|type=="bustools"){
		return_obj = list()

		if(hasArg("countsFromAbundance")){
		if(methods::hasArg("countsFromAbundance")){
		warning(paste0("\nImport of ", type," files currently does not support using scaling methods.\nPlease note, that in tagged-end single-cell protocols (like 10X chromium or SureCell) it is assumed\nthat there is no length effect in the fragment generation process - thus making a scaling unnecessary."))
		}

		@@ -55,12 +57,12 @@ import_counts <- function(files, type, ...){
		return(return_obj)

		}else{
		if(hasArg("countsFromAbundance")){
		if(methods::hasArg("countsFromAbundance")){
		if(!args$countsFromAbundance %in% c("dtuScaledTPM", "scaledTPM")){
		warning("It is recommended to use the 'countsFromAbundance' scaling schemes 'dtuScaledTPM' or 'scaledTPM',\nto correct for increased counts of longer transcripts.\nIf you are using a tagged-end protocol, the use 'no' is suggested.")
		}
		}else{
		if(hasArg("tx2gene")){
		if(methods::hasArg("tx2gene")){
		message("Using 'dtuScaledTPM' for 'countsFromAbundance'.")
		args$countsFromAbundance <- "dtuScaledTPM"
		}else{
		@@ -68,7 +70,7 @@ import_counts <- function(files, type, ...){
		args$countsFromAbundance <- "scaledTPM"
		}
		}
		if(hasArg(txOut)){
		if(methods::hasArg("txOut")){
		if(args$txOut==F){
		warning("Unless you exactly know what you are doing it is not recommended to set txOut to FALSE.\nDownstream analysis may fail!")
		}
		@@ -105,8 +107,8 @@ combine_to_matrix <- function(tx_list, cell_extensions=NULL, seurat_obj=NULL, tx

		if(!is.null(seurat_obj)){
		assertthat::assert_that(require("Seurat", character.only = T), msg = "The package Seurat is needed for adding the combined matrix to a seurat object.")
		assertthat::assert_that(packageVersion("Seurat")>="3.0.0", msg = "At least Version 3 of Seurat is needed. Currently only Seurat 3 objects are supported.")
		assertthat::assert_that(is(seurat_obj, "Seurat"), msg = "The provided 'seurat_obj' is not of class Seurat.")
		assertthat::assert_that(utils::packageVersion("Seurat")>="3.0.0", msg = "At least Version 3 of Seurat is needed. Currently only Seurat 3 objects are supported.")
		assertthat::assert_that(methods::is(seurat_obj, "Seurat"), msg = "The provided 'seurat_obj' is not of class Seurat.")
		assertthat::assert_that(seurat_obj@version>="3.0.0", msg = "The provided 'seurat_obj' is not a Seurat 3 object. Currently only Seurat 3 objects are supported.")
		assertthat::assert_that(assertthat::is.string(assay_name))
		}
		@@ -115,7 +117,7 @@ combine_to_matrix <- function(tx_list, cell_extensions=NULL, seurat_obj=NULL, tx
		tx_list <- list(tx_list)
		}

		if(!all(as.logical(lapply(tx_list, FUN = function(x) is(x, 'sparseMatrix'))))){
		if(!all(as.logical(lapply(tx_list, FUN = function(x) methods::is(x, 'sparseMatrix'))))){
		stop("Your 'tx_list' object contains non sparseMatrix elements.")
		}

		@@ -130,7 +132,7 @@ combine_to_matrix <- function(tx_list, cell_extensions=NULL, seurat_obj=NULL, tx
		dup <- any(duplicated(unlist(lapply(tx_list, FUN = colnames))))

		if(!is.null(seurat_obj)){
		extensions_in_seur <- sum(grepl(pattern = "_", x = Cells(seurat_obj), fixed = T))>length(Cells(seurat_obj))*0.1
		extensions_in_seur <- sum(grepl(pattern = "_", x = Seurat::Cells(seurat_obj), fixed = T))>length(Seurat::Cells(seurat_obj))*0.1
		if(!extensions_in_seur&dup){
		stop("Found no Seurat cellname extension but not unique cellnames. Make your cellnames unique or extend the cellnames in your Seurat object.")
		}
		@@ -139,7 +141,7 @@ combine_to_matrix <- function(tx_list, cell_extensions=NULL, seurat_obj=NULL, tx
		if(!is.null(cell_extensions)){
		warning("Discarding provided cellname extensions and using Seurat object ones.")
		}
		cell_extensions <- paste0("_", unique(stringi::stri_split_fixed(str= Cells(seurat_obj), pattern = "_", n = 2, simplify = T)[,2]))
		cell_extensions <- paste0("_", unique(stringi::stri_split_fixed(str= Seurat::Cells(seurat_obj), pattern = "_", n = 2, simplify = T)[,2]))
		if(length(cell_extensions)!=length(names(tx_list))){
		stop("Could not 1:1 map seurat cellname extensions and tx file list.\n",
		"Try subsetting the seurat object if you do not want to provide tx information for all samples.")
		@@ -179,9 +181,9 @@ combine_to_matrix <- function(tx_list, cell_extensions=NULL, seurat_obj=NULL, tx
		tx_list <- merge_sparse(tx_list)

		if(!is.null(seurat_obj)){
		common_cells <- intersect(Cells(seurat_obj), colnames(tx_list))
		uniq_cells <- setdiff(colnames(tx_list), Cells(seurat_obj))
		extra_seurat_cells <- setdiff(Cells(seurat_obj), colnames(tx_list))
		common_cells <- intersect(Seurat::Cells(seurat_obj), colnames(tx_list))
		uniq_cells <- setdiff(colnames(tx_list), Seurat::Cells(seurat_obj))
		extra_seurat_cells <- setdiff(Seurat::Cells(seurat_obj), colnames(tx_list))
		message("Of ", ncol(tx_list), " cells, ", length(common_cells), " (", round(length(common_cells)/ncol(tx_list)*100), "%) could be found in the provided seurat object.")
		message(length(uniq_cells), " (", round(length(uniq_cells)/ncol(tx_list)*100),"%) are unique to the transcriptomic files.")
		message("The seurat object contains ", length(extra_seurat_cells), " additional cells.")
		@@ -200,7 +202,7 @@ combine_to_matrix <- function(tx_list, cell_extensions=NULL, seurat_obj=NULL, tx

		if(!is.null(seurat_obj)){
		message("Adding assay '",assay_name,"'")
		seurat_obj[[assay_name]] <- CreateAssayObject(counts=tx_list)
		seurat_obj[[assay_name]] <- Seurat::CreateAssayObject(counts=tx_list)
		seurat_obj@active.assay <- assay_name

		if(!is.null(tx2gene)){
		@@ -236,7 +238,7 @@ combine_to_matrix <- function(tx_list, cell_extensions=NULL, seurat_obj=NULL, tx
		#' @param BPPARAM If multicore processing should be used, specify a `BiocParallelParam` object here. Among others, can be `SerialParam()` (default) for non-multicore processing or `MulticoreParam('number_cores')` for multicore processing. See \code{\link[BiocParallel:BiocParallel-package]{BiocParallel}} for more information.
		#' @param force_dense If you do not want to use a sparse Matrix for DRIMSeq calculations, you can force a dense conversion by specifying `TRUE`. Increases memory usage, but also reduces runtime drastically (currently).
		#' @param carry_over_metadata Specify if compatible additional columns of `tx2gene` shall be carried over to the gene and transcript level `meta_table` in the results. Columns with `NA` values are not carried over.
		#' @param ... If `filtering_strategy='own'` specify the wished parameters of `dmFilter()` here.
		#' @param ... If `filtering_strategy='own'` specify the wished parameters of \code{\link[sparseDRIMSeq:dmFilter]{dmFilter}} here.
		#'
		#' @return `dturtle` object with the key results, that can be used in the DTUrtle steps hereafter. The object is just a easily accesible list with the following items:
		#' - `meta_table_gene`: Data frame of the expressed-in ratio of all genes. Expressed-in is defined as expression > 0. Can be used to add gene level meta-information for plotting.
		@@ -252,9 +254,9 @@ combine_to_matrix <- function(tx_list, cell_extensions=NULL, seurat_obj=NULL, tx
		#'
		#' @examples
		run_drimseq <- function(counts, tx2gene, pd, id_col=NULL, cond_col, cond_levels=NULL, filtering_strategy="bulk", BPPARAM=BiocParallel::SerialParam(), force_dense=T, carry_over_metadata=T, ...){
		if(is(counts, "Seurat")){
		if(methods::is(counts, "Seurat")){
		assertthat::assert_that(require("Seurat", character.only = T), msg = "The package Seurat is needed for adding the combined matrix to a seurat object.")
		assertthat::assert_that(packageVersion("Seurat")>="3.0.0", msg = "At least Version 3 of Seurat is needed. Currently only Seurat 3 objects are supported.")
		assertthat::assert_that(utils::packageVersion("Seurat")>="3.0.0", msg = "At least Version 3 of Seurat is needed. Currently only Seurat 3 objects are supported.")
		assertthat::assert_that(counts@version>="3.0.0", msg = "The provided 'counts' is not a Seurat 3 object. Currently only Seurat 3 objects are supported.")
		if(is.vector(tx2gene)){
		meta_df <- counts[[counts@active.assay]]@meta.features
		@@ -262,15 +264,15 @@ run_drimseq <- function(counts, tx2gene, pd, id_col=NULL, cond_col, cond_levels=
		assertthat::assert_that(all(tx2gene %in% colnames(meta_df)), msg = "Not all provided 'tx2gene' colnames are present in the feature-level meta data.")
		tx2gene <- counts[[counts@active.assay]]@meta.features[,tx2gene]
		}
		counts <- GetAssayData(counts)
		counts <- Seurat::GetAssayData(counts)
		}
		assertthat::assert_that(is(counts, "matrix")\|is(counts, "sparseMatrix"))
		assertthat::assert_that(is(tx2gene, "data.frame"))
		assertthat::assert_that(is(pd, "data.frame"))
		assertthat::assert_that(methods::is(counts, "matrix")\|methods::is(counts, "sparseMatrix"))
		assertthat::assert_that(methods::is(tx2gene, "data.frame"))
		assertthat::assert_that(methods::is(pd, "data.frame"))
		assertthat::assert_that(ncol(tx2gene)>1)
		assertthat::assert_that(cond_col %in% colnames(pd), msg = paste0("Could not find", cond_col, " in column names of pd."))
		assertthat::assert_that(filtering_strategy %in% c("bulk", "sc", "own"), msg = "Please select a valid filtering strategy ('bulk', 'sc' or 'own').")
		assertthat::assert_that(is(BPPARAM, "BiocParallelParam"), msg = "Please provide a valid BiocParallelParam object.")
		assertthat::assert_that(methods::is(BPPARAM, "BiocParallelParam"), msg = "Please provide a valid BiocParallelParam object.")
		assertthat::assert_that(is.logical(force_dense))
		assertthat::assert_that(is.logical(carry_over_metadata))
		assertthat::assert_that(all(rownames(counts) %in% tx2gene[[1]]), msg = "Not all rownames of the counts are present in the first tx2gene column. You may want to reorder the tx2gene columns with 'move_columns_to_front()'.")
		@@ -301,7 +303,7 @@ run_drimseq <- function(counts, tx2gene, pd, id_col=NULL, cond_col, cond_levels=
		samp <- samp[!is.na(samp$condition),]
		counts <- counts[ , !(colnames(counts) %in% exclude)]
		}
		message("\nProceed with cells/samples: ",paste0(capture.output(table(samp$condition)), collapse = "\n"))
		message("\nProceed with cells/samples: ",paste0(utils::capture.output(table(samp$condition)), collapse = "\n"))

		#TODO: Reevaluate!
		message("\nFiltering...\n")
		@@ -314,12 +316,12 @@ run_drimseq <- function(counts, tx2gene, pd, id_col=NULL, cond_col, cond_levels=
		switch(filtering_strategy,
		sc={

		filter_opt_list <- modifyList(filter_opt_list, list("min_samps_feature_prop" = smallest_group,
		filter_opt_list <- utils::modifyList(filter_opt_list, list("min_samps_feature_prop" = smallest_group,
		"min_feature_prop" = 0.1, "run_gene_twice" = T))
		},
		bulk={

		filter_opt_list <- modifyList(filter_opt_list,
		filter_opt_list <- utils::modifyList(filter_opt_list,
		list("min_samps_feature_expr" = smallest_group,
		"min_feature_expr" = 1,
		"min_samps_gene_expr" = total_sample,
		@@ -329,12 +331,12 @@ run_drimseq <- function(counts, tx2gene, pd, id_col=NULL, cond_col, cond_levels=

		},
		own={
		filter_opt_list <- modifyList(filter_opt_list, list(...))
		filter_opt_list <- utils::modifyList(filter_opt_list, list(...))
		})
		counts <- do.call(sparse_filter, args = c(list("counts" = counts, "tx2gene" = tx2gene, "BPPARAM" = BPPARAM), filter_opt_list), quote=TRUE)
		tx2gene <- tx2gene[match(rownames(counts), tx2gene$feature_id),]

		if(is(counts, 'sparseMatrix')&force_dense){
		if(methods::is(counts, 'sparseMatrix')&force_dense){
		counts <- tryCatch(
		{
		as.matrix(counts)
		@@ -363,9 +365,10 @@ run_drimseq <- function(counts, tx2gene, pd, id_col=NULL, cond_col, cond_levels=
		}
		}
		}
		design_full <- model.matrix(~condition, data=samp)
		design_full <- stats::model.matrix(~condition, data=samp)

		message("\nPerforming statistical tests...\n")
		BiocParallel::bpprogressbar(BPPARAM) <- T
		drim_test <- sparseDRIMSeq::dmPrecision(drim, design=design_full, prec_subset=1, BPPARAM=BPPARAM, add_uniform=T)
		drim_test <- sparseDRIMSeq::dmFit(drim_test, design=design_full, BPPARAM=BPPARAM, add_uniform=T)
		drim_test <- sparseDRIMSeq::dmTest(drim_test, coef=colnames(design_full)[2], BPPARAM=BPPARAM)
		@@ -377,7 +380,7 @@ run_drimseq <- function(counts, tx2gene, pd, id_col=NULL, cond_col, cond_levels=

		return_obj <- list("meta_table_gene"=exp_in_gn, "meta_table_tx"=exp_in_tx, "meta_table_sample"=samp,
		"drim"=drim_test, "design_full"=design_full, "group"=group,
		"used_filtering_options"=list("DRIM"=filter_opt_list), "tictoc"=tictoc::tic.log(format = T))
		"used_filtering_options"=list("DRIM"=filter_opt_list))
		class(return_obj) <- append("dturtle", class(return_obj))
		return(return_obj)
		}
		@@ -436,7 +439,7 @@ posthoc_and_stager <- function(dturtle, ofdr=0.05, posthoc=0.1){

		if(length(sig_gene)>0){
		temp <- fdr_table[fdr_table$gene<ofdr&fdr_table$transcript<ofdr,,drop=F]
		sig_tx <- setNames(temp$txID, temp$geneID)
		sig_tx <- stats::setNames(temp$txID, temp$geneID)
		message("Found ",length(sig_gene)," significant genes with ",length(sig_tx)," significant transcripts (OFDR: ",ofdr,")")
		}else{
		sig_gene <- NULL

Admin message