Updated posthoc and dtu table creation. (510a37ab) · Commits · github_fork / DTUrtle

NAMESPACE

+1 −0

Original line number	Diff line number	Diff line
		@@ -7,6 +7,7 @@ export(get_by_partition)
		export(import_counts)
		export(import_gtf)
		export(move_columns_to_front)
		export(plot_dtu_table)
		export(posthoc_and_stager)
		export(rm_version)
		export(run_drimseq)

R/dtu_analysis.R

+29 −25

Original line number	Diff line number	Diff line
		@@ -83,7 +83,7 @@ import_counts <- function(files, type, ...){
		#' @param tx_list List of sparse transcription count matrices, as returned by `import_counts()` for single-cell data.
		#' @param cell_extensions Optional list of cellname extensions that are added to the cellnames of one sample. The cellnames and the extension are separated by an underscore '_'.
		#' @param seurat_obj Optional seurat object, where the combined matrix is added as an assay. This has the advantage, that the cells are matched and subsetted if necessary. Currently only Seurat 3 objects are supported.
		#' @param tx2gene Optional tx2gene/metadata data frame, which is added as feature-level meta data to the created assay. The first column of the data frame must contain transcript names/ids. The same transcript names/ids as in the `tx_list` objects must be used.
		#' @param tx2gene Optional tx2gene/metadata data frame, can only be used in conjunction with a seurat object. Metadata is added as feature-level meta data to the created assay. The first column of the data frame must contain transcript names/ids. The same transcript names/ids as in the `tx_list` objects must be used.
		#' @param assay_name If the combined matrix should be added to an existing Seurat object, the name of the assay can be specified here.
		#'
		#' @return Either a combined sparse transcription count matrix or a seurat object which the combined sparse transcription count matrix as an assay.
		@@ -247,21 +247,25 @@ run_drimseq <- function(counts, tx2gene, pd, id_col=NULL, cond_col, cond_levels=
		assertthat::assert_that(packageVersion("Seurat")>="3.0.0", msg = "At least Version 3 of Seurat is needed. Currently only Seurat 3 objects are supported.")
		assertthat::assert_that(counts@version>="3.0.0", msg = "The provided 'counts' is not a Seurat 3 object. Currently only Seurat 3 objects are supported.")
		if(is.vector(tx2gene)){
		if(ncol(counts[[counts@active.assay]]@meta.features)>1){
		meta_df <- counts[[counts@active.assay]]@meta.features
		assertthat::assert_that(ncol(meta_df)>1, msg="No feature-level meta data in active assay of seurat object. Was 'tx2gene' provided in 'combine_to_matrix()'?\nAlternatively provide a real tx2gene dataframe.")
		assertthat::assert_that(all(tx2gene %in% colnames(meta_df)), msg = "Not all provided 'tx2gene' colnames are present in the feature-level meta data.")
		tx2gene <- counts[[counts@active.assay]]@meta.features[,tx2gene]
		}else{
		stop("No feature-level meta data in seurat object. Was 'tx2gene' provided in 'combine_to_matrix()'?\nAlternatively provide a real tx2gene dataframe.")
		}
		}
		counts <- GetAssayData(counts)
		}
		assertthat::assert_that(is(counts, "matrix")\|is(counts, "sparseMatrix"))
		assertthat::assert_that(is(tx2gene, "data.frame"))
		assertthat::assert_that(is(pd, "data.frame"))
		assertthat::assert_that(ncol(tx2gene)>1)
		assertthat::assert_that(cond_col %in% colnames(pd), msg = paste0("Could not find", cond_col, " in colnames of pd."))
		assertthat::assert_that(filtering_strategy %in% c("bulk", "sc", "own"), msg = "Please select a valid filtering strategy ('bulk', 'sc' or 'own').")
		assertthat::assert_that(is(BPPARAM, "BiocParallelParam"), msg = "Please provide a valid BiocParallelParam object.")
		assertthat::assert_that(is.logical(force_dense))
		assertthat::assert_that(is.logical(carry_over_metadata))
		assertthat::assert_that(all(rownames(counts) %in% tx2gene[[1]]), msg = "Not all rownames of the counts are present in the first tx2gene column. You may want to reorder the tx2gene columns with 'move_columns_to_front()'.")

		tx2gene <- rapply(tx2gene, as.character, classes="factor", how="replace")
		tx2gene <- tx2gene[match(rownames(counts), tx2gene[[1]]),]
		assertthat::assert_that(nrow(tx2gene)==nrow(counts))
		colnames(tx2gene)[c(1,2)] <- c("feature_id", "gene_id")
		@@ -320,8 +324,9 @@ run_drimseq <- function(counts, tx2gene, pd, id_col=NULL, cond_col, cond_levels=
		own={
		filter_opt_list <- modifyList(filter_opt_list, list(...))
		})
		counts <- do.call(sparse_filter, args = c(list(counts = counts, tx2gene = tx2gene, BPPARAM = BPPARAM),
		filter_opt_list))
		tictoc::tic("Filter")
		counts <- do.call(sparse_filter, args = c(list("counts" = counts, "tx2gene" = tx2gene, "BPPARAM" = BPPARAM), filter_opt_list), quote=TRUE)
		tictoc::toc(log = T)
		tx2gene <- tx2gene[match(rownames(counts), tx2gene$feature_id),]

		if(is(counts, 'sparseMatrix')&force_dense){
		@@ -336,15 +341,13 @@ run_drimseq <- function(counts, tx2gene, pd, id_col=NULL, cond_col, cond_levels=
		format(structure(as.double(nrow(counts))as.double(ncol(counts))8, class="object_size"), units="auto"), " of memory.")
		})
		}
		# counts <- data.frame(tx2gene, counts, row.names = NULL, stringsAsFactors = F)
		#
		# drim <- DRIMSeq::dmDSdata(counts = counts, samples = samp)

		drim <- sparseDRIMSeq::sparse_dmDSdata(tx2gene = tx2gene, counts = counts, samples = samp)

		exp_in_tx <- ratio_expression_in(drim, "tx")
		exp_in_gn <- ratio_expression_in(drim, "gene")

		tictoc::tic("Ratios")
		exp_in_tx <- ratio_expression_in(drim, "tx", BPPARAM=BPPARAM)
		exp_in_gn <- ratio_expression_in(drim, "gene", BPPARAM=BPPARAM)
		tictoc::toc(log = T)
		tictoc::tic("Metadata")
		#carry over metadata
		if(carry_over_metadata&ncol(tx2gene)>2){
		temp <- tx2gene[match(rownames(exp_in_tx), tx2gene$feature_id),]
		@@ -353,13 +356,12 @@ run_drimseq <- function(counts, tx2gene, pd, id_col=NULL, cond_col, cond_levels=
		exp_in_tx <- cbind(exp_in_tx, temp[,-c(1,2)])
		add_to_gene_columns <- check_unique_by_partition(temp[,-c(1,2)], drim@counts@partitioning)
		if(!is.null(add_to_gene_columns)){
		temp <- get_by_partition(df = temp, columns =add_to_gene_columns, partitioning = drim@counts@partitioning, FUN=unique)
		temp <- get_by_partition(df = temp, columns =add_to_gene_columns, partitioning = drim@counts@partitioning, FUN=unique, BPPARAM = BPPARAM)
		exp_in_gn <- cbind(exp_in_gn, temp[match(rownames(exp_in_gn), temp[[1]]),-c(1)])
		}
		}
		}


		tictoc::toc(log = T)
		design_full <- model.matrix(~condition, data=samp)

		tictoc::toc(log = T)
		@@ -385,6 +387,7 @@ run_drimseq <- function(counts, tx2gene, pd, id_col=NULL, cond_col, cond_levels=


		#TODO: implement Noise - IQR filtering
		#TODO: Posthoc!

		#' Posthoc filtering and two-staged statistical tests
		#'
		@@ -399,13 +402,13 @@ run_drimseq <- function(counts, tx2gene, pd, id_col=NULL, cond_col, cond_levels=
		#' @param ofdr Overall false discovery rate (OFDR) threshold.
		#' @param posthoc_filt Specify the minimal standard deviation of a transcripts porportion level that should be kept when performing posthoc filtering. To disbale poshoc filtering 0 or `FALSE` can be provided.
		#' @return An extended `dturtle` object. Additional slots include:
		#' - `DE_gene`:
		#' - `DE_tx` :
		#' - `FDR_table` :
		#' - `DE_gene`: A character vector of all genes where the first stageR step was significant. Basically the significant genes, that showed signs of DTU.
		#' - `DE_tx` : A named character vector of transcripts where the second stageR step was significant. Basically the significant transcripts of the significant genes.
		#' - `FDR_table` : A data frame of the stage-wise adjusted p-values for all genes/transcripts. Might contain NA-values, as transcript level p-values are not avaible when the gene level test was not significant.
		#'
		#' @family DTUrtle
		#' @export
		#' @seealso [run_drimseq()] for DTU object creation. [create_dtu_table()] for result visualization.
		#' @seealso [run_drimseq()] for DTU object creation. [create_dtu_table()] for result table creation.
		#'
		#' @examples
		posthoc_and_stager <- function(dturtle, ofdr=0.05, posthoc=0.1){
		@@ -417,8 +420,8 @@ posthoc_and_stager <- function(dturtle, ofdr=0.05, posthoc=0.1){
		}
		assertthat::assert_that(0<=ofdr & ofdr<=1, msg = "The provided 'ofdr' parameter is invalid. Must be a number between [0,1].")

		res <- DRIMSeq::results(dturtle$drim)
		res_txp <- DRIMSeq::results(dturtle$drim, level="feature")
		res <- sparseDRIMSeq::results(dturtle$drim)
		res_txp <- sparseDRIMSeq::results(dturtle$drim, level="feature")

		if(posthoc!=F\|posthoc>0){
		res_txp <- run_posthoc(dturtle$drim, posthoc)
		@@ -438,7 +441,8 @@ posthoc_and_stager <- function(dturtle, ofdr=0.05, posthoc=0.1){
		DE_gene <- unique(as.character(fdr_table$geneID[fdr_table$gene<ofdr]))

		if(length(DE_gene)>0){
		DE_tx <- fdr_table$txID[fdr_table$gene<ofdr&fdr_table$transcript<ofdr]
		temp <- fdr_table[fdr_table$gene<ofdr&fdr_table$transcript<ofdr,]
		DE_tx <- setNames(as.character(temp$txID), temp$geneID)
		message("Found ",length(DE_gene)," significant genes with ",length(DE_tx)," significant transcripts (OFDR: ",ofdr,")")
		}else{
		DE_gene <- NULL

R/sparse_support.R

+3 −228

Original line number	Diff line number	Diff line
		@@ -6,20 +6,20 @@
		#'
		#' @param counts Sparse count matrix.
		#' @param tx2gene Feature to gene mapping.
		#' @inheritParams DRIMSeq::dmFilter
		#' @param BPPARAM If multicore processing should be used, specify a `BiocParallelParam` object here. Among others, can be `SerialParam()` (default) for standard non-multicore processing or `MulticoreParam('number_cores')` for multicore processing. See \code{\link[BiocParallel:BiocParallel-package]{BiocParallel}} for more information.
		#' @inheritParams sparseDRIMSeq::dmFilter
		#'
		#' @return Filtered sparse matrix
		sparse_filter <- function(counts, tx2gene, BPPARAM=BiocParallel::SerialParam(), min_samps_gene_expr = 0,
		min_gene_expr = 0, min_samps_feature_expr = 0, min_feature_expr = 0,
		min_samps_feature_prop = 0, min_feature_prop = 0,
		run_gene_twice=FALSE){

		assertthat::assert_that(is(counts, "matrix")\|is(counts, "sparseMatrix"))
		counts <- counts[Matrix::rowSums(counts)>0,]
		tx2gene <- tx2gene[match(rownames(counts), tx2gene[[1]]),]
		#genes with at least two transcripts
		inds <- which(duplicated(tx2gene[[2]]) \| duplicated(tx2gene[[2]], fromLast = TRUE))
		inds <- split(inds, f = tx2gene[inds,2])

		filter <- function(row_index){
		expr_features <- counts[row_index,]

		@@ -71,231 +71,6 @@ sparse_filter <- function(counts, tx2gene, BPPARAM=BiocParallel::SerialParam(),

		counts_new <- BiocParallel::bplapply(inds, FUN=filter, BPPARAM=BPPARAM)
		counts_new <- counts[unlist(counts_new),]

		assertthat::assert_that(nrow(counts_new)>0, msg = "No Features left after filtering. Maybe try more relaxed filtering parameter.")
		message("Retain ",nrow(counts_new), " of ",nrow(counts)," features.\nRemoved ", nrow(counts)-nrow(counts_new), " features.")
		return(counts_new)
		}




		sparse_filter_naive <- function(counts, tx2gene, min_samps_gene_expr = 0,
		min_gene_expr = 0, min_samps_feature_expr = 0, min_feature_expr = 0,
		min_samps_feature_prop = 0, min_feature_prop = 0,
		run_gene_twice=FALSE){
		counts <- counts[Matrix::rowSums(counts)>0,]
		tx2gene <- tx2gene[match(rownames(counts), tx2gene[[1]]),]
		#genes with at least two transcripts
		inds <- unique(tx2gene[[2]][duplicated(tx2gene[[2]])])
		inds <- lapply(inds, FUN=function(x) counts[tx2gene[[1]][tx2gene[[2]]==x],])

		counts_new <- NULL
		for(expr_features in inds){
		### no genes with no expression
		if(sum(expr_features, na.rm = TRUE) == 0)
		next()

		### genes with min expression
		if(! sum(Matrix::colSums(expr_features) >= min_gene_expr, na.rm = TRUE) >=
		min_samps_gene_expr )
		next()

		### no features with no expression
		features2keep <- Matrix::rowSums(expr_features > 0, na.rm = TRUE) > 0

		### no genes with one feature
		if(sum(features2keep) <= 1)
		next()

		expr_features <- expr_features[features2keep, , drop = FALSE]

		### features with min expression
		features2keep <- Matrix::rowSums(expr_features >= min_feature_expr, na.rm = TRUE) >=
		min_samps_feature_expr

		### no genes with one feature
		if(sum(features2keep) <= 1)
		next()

		expr_features <- expr_features[features2keep, , drop = FALSE]

		### genes with zero expression
		samps2keep <- Matrix::colSums(expr_features) > 0 & !is.na(expr_features[1, ])

		if(sum(samps2keep) < max(1, min_samps_feature_prop))
		next()

		temp <- expr_features[, samps2keep, drop = FALSE]
		prop <- sweep(temp, 2, Matrix::colSums(temp), "/")

		### features with min proportion
		features2keep <- Matrix::rowSums(prop >= min_feature_prop) >= min_samps_feature_prop

		### no genes with one feature
		if(sum(features2keep) <= 1)
		next()

		expr <- expr_features[features2keep, , drop = FALSE]

		if (run_gene_twice) {
		### no genes with no expression
		if(sum(expr_features, na.rm = TRUE) == 0)
		next()

		### genes with min expression
		if(! sum(Matrix::colSums(expr_features) >= min_gene_expr, na.rm = TRUE) >=
		min_samps_gene_expr )
		next()
		}
		counts_new <- rbind(counts_new, expr)
		}
		assertthat::assert_that(nrow(counts_new)>0, msg = "No Features left after filtering. Maybe try more relaxed filtering parameter.")
		message("Retain ",nrow(counts_new), " of ",nrow(counts)," features.\nRemoved ", nrow(counts)-nrow(counts_new), " features.")
		return(counts_new)
		}

		sparse_filter_naive_parallel <- function(counts, tx2gene, BPPARAM=BiocParallel::SerialParam(), min_samps_gene_expr = 0,
		min_gene_expr = 0, min_samps_feature_expr = 0, min_feature_expr = 0,
		min_samps_feature_prop = 0, min_feature_prop = 0,
		run_gene_twice=FALSE){


		tx2gene <- tx2gene[match(rownames(counts), tx2gene[[1]]),]
		#genes with at least two transcripts
		inds <- unique(tx2gene[[2]][duplicated(tx2gene[[2]])])
		inds <- lapply(inds, FUN=function(x) counts[tx2gene[[1]][tx2gene[[2]]==x],])

		filter <- function(expr_features){
		### no genes with no expression
		if(sum(expr_features, na.rm = TRUE) == 0)
		return(NULL)

		### genes with min expression
		if(! sum(Matrix::colSums(expr_features) >= min_gene_expr, na.rm = TRUE) >=
		min_samps_gene_expr )
		return(NULL)

		### no features with no expression
		features2keep <- Matrix::rowSums(expr_features > 0, na.rm = TRUE) > 0

		### no genes with one feature
		if(sum(features2keep) <= 1)
		return(NULL)

		expr_features <- expr_features[features2keep, , drop = FALSE]

		### features with min expression
		features2keep <- Matrix::rowSums(expr_features >= min_feature_expr, na.rm = TRUE) >=
		min_samps_feature_expr

		### no genes with one feature
		if(sum(features2keep) <= 1)
		return(NULL)

		expr_features <- expr_features[features2keep, , drop = FALSE]

		### genes with zero expression
		samps2keep <- Matrix::colSums(expr_features) > 0 & !is.na(expr_features[1, ])

		if(sum(samps2keep) < max(1, min_samps_feature_prop))
		return(NULL)

		temp <- expr_features[, samps2keep, drop = FALSE]
		prop <- sweep(temp, 2, Matrix::colSums(temp), "/")

		### features with min proportion
		features2keep <- Matrix::rowSums(prop >= min_feature_prop) >= min_samps_feature_prop

		### no genes with one feature
		if(sum(features2keep) <= 1)
		return(NULL)

		expr <- expr_features[features2keep, , drop = FALSE]

		if (run_gene_twice) {
		### no genes with no expression
		if(sum(expr_features, na.rm = TRUE) == 0)
		return(NULL)

		### genes with min expression
		if(! sum(Matrix::colSums(expr_features) >= min_gene_expr, na.rm = TRUE) >=
		min_samps_gene_expr )
		return(NULL)
		}
		return(expr)
		}

		counts_new <- BiocParallel::bplapply(inds, FUN=filter, BPPARAM=BPPARAM)
		counts_new <- do.call(rbind, counts_new)

		assertthat::assert_that(nrow(counts_new)>0, msg = "No Features left after filtering. Maybe try more relaxed filtering parameter.")
		return(counts_new)
		}

		sparse_filter_index <- function(counts, tx2gene, min_samps_gene_expr = 0,
		min_gene_expr = 0, min_samps_feature_expr = 0, min_feature_expr = 0,
		min_samps_feature_prop = 0, min_feature_prop = 0,
		run_gene_twice=FALSE){

		counts <- counts[Matrix::rowSums(counts)>0,]
		tx2gene <- tx2gene[match(rownames(counts), tx2gene[[1]]),]
		#genes with at least two transcripts
		inds <- which(duplicated(tx2gene[[2]]) \| duplicated(tx2gene[[2]], fromLast = TRUE))
		inds <- split(inds, f = tx2gene[inds,2])

		filter <- function(row_index){
		expr_features <- counts[row_index,]

		### genes with min expression
		if(! sum(Matrix::colSums(expr_features) >= min_gene_expr, na.rm = TRUE) >=
		min_samps_gene_expr )
		return(NULL)

		### features with min expression
		row_index <- Matrix::rowSums(expr_features >= min_feature_expr, na.rm = TRUE) >=
		min_samps_feature_expr

		### no genes with one feature
		if(sum(row_index) <= 1)
		return(NULL)

		expr_features <- expr_features[row_index, , drop = FALSE]

		### genes with zero expression
		samps2keep <- Matrix::colSums(expr_features) > 0 & !is.na(expr_features[1, ])

		if(sum(samps2keep) < max(1, min_samps_feature_prop))
		return(NULL)

		temp <- expr_features[, samps2keep, drop = FALSE]
		prop <- sweep(temp, 2, Matrix::colSums(temp), "/")

		### features with min proportion
		row_index <- Matrix::rowSums(prop >= min_feature_prop) >= min_samps_feature_prop

		### no genes with one feature
		if(sum(row_index) <= 1)
		return(NULL)

		expr <- expr_features[row_index, , drop = FALSE]

		if (run_gene_twice) {
		### no genes with no expression
		if(sum(expr_features, na.rm = TRUE) == 0)
		return(NULL)

		### genes with min expression
		if(! sum(Matrix::colSums(expr_features) >= min_gene_expr, na.rm = TRUE) >=
		min_samps_gene_expr )
		return(NULL)
		}
		return(rownames(expr))
		}

		counts_new <- lapply(inds, FUN=filter)
		counts_new <- counts[unlist(counts_new),]

		assertthat::assert_that(nrow(counts_new)>0, msg = "No Features left after filtering. Maybe try more relaxed filtering parameter.")
		message("Retain ",nrow(counts_new), " of ",nrow(counts)," features.\nRemoved ", nrow(counts)-nrow(counts_new), " features.")
		return(counts_new)

R/utility_functions.R

+23 −25

Original line number	Diff line number	Diff line
		@@ -17,7 +17,7 @@ no_na <- function(x){
		#' @return
		smallProportionSD <- function(d, filter) {
		browser()
		d <- DRIMSeq::counts(d)
		d <- sparseDRIMSeq::counts(d)
		cts <- as.matrix(d[,!colnames(d) %in% c("gene_id", "feature_id")])
		gene.cts <- rowsum(cts, d$gene_id)
		total.cts <- gene.cts[match(d$gene_id, rownames(gene.cts)),]
		@@ -87,9 +87,9 @@ seurat_add_tx2gene <- function(seurat_obj, tx2gene){
		#' @param drim
		#' @param filt Threshold
		#'
		#' @return A filtered `DRIMSeq::results()` data frame.
		#' @return A filtered `sparseDRIMSeq::results()` data frame.
		run_posthoc <- function(drim, filt){
		res_txp_filt <- DRIMSeq::results(drim, level="feature")
		res_txp_filt <- sparseDRIMSeq::results(drim, level="feature")
		filt <- smallProportionSD(drim, filt)
		res_txp_filt$pvalue[filt] <- 1
		res_txp_filt$adj_pvalue[filt] <- 1
		@@ -112,23 +112,19 @@ get_diff <- function(gID, dturtle){
		return(y)
		}

		#' Add a column specifying the maximal difference between the two comparison groups

		#' Return the maximal absolute difference for all transcripts of the procvided gene
		#'
		#' @param dtu_table The dtu data frame where the column shall be added.
		#' @param dturtle The corresponding dturtle object the `dtu_table` originates from.
		#' @param gID gene-identifier
		#' @param dturtle dturtle object
		#'
		#' @return A dtu data frame with the added column.
		add_max_delta <- function(dtu_table, dturtle){
		getmax <- function(gID){
		#' @return The maximal absolute difference value
		getmax <- function(gID, dturtle){
		y <- get_diff(gID, dturtle)
		#get absoulte maximum while preserving sign
		return(y$diff[which.max(abs(y$diff))])
		}

		dtu_table[[paste0("max(",levels(dturtle$group)[1], "-",levels(dturtle$group)[2],")")]] <- as.numeric(sapply(dtu_table$geneID, FUN = getmax))
		return(dtu_table)
		}


		#' Parse a gtf file and return a transcript level dataframe.
		#'
		@@ -192,11 +188,11 @@ rm_version <- function(x){
		#' @param type Type of the summarization that shall be performed. Options are:
		#' - `'tx'`: Transcript-level expressed-in ratios.
		#' - `'gene'`: Gene-level expressed-in ratios.
		#'
		#' @param BPPARAM If multicore processing should be used, specify a `BiocParallelParam` object here. Among others, can be `SerialParam()` (default) for standard non-multicore processing or `MulticoreParam('number_cores')` for multicore processing. See \code{\link[BiocParallel:BiocParallel-package]{BiocParallel}} for more information.
		#' @return Data frame with the expressed-in ratios.
		#'
		#' @examples
		ratio_expression_in <- function(drim, type){
		ratio_expression_in <- function(drim, type, BPPARAM=BiocParallel::SerialParam()){
		assertthat::assert_that(type %in% c("tx", "gene"))
		part <- drim@counts@partitioning
		data <- drim@counts@unlistData
		@@ -209,19 +205,19 @@ ratio_expression_in <- function(drim, type){
		ret <- data.frame(rep(names(part), lengths(part)),
		rownames(data),
		Matrix::rowSums(data!=0)/ncol(data),
		sapply(cond, function(x){
		BiocParallel::bplapply(cond, FUN = function(x){
		group_data = data[,drim@samples$sample_id[drim@samples$condition==x],drop=F]
		return(Matrix::rowSums(group_data!=0)/ncol(group_data))
		}), stringsAsFactors = F)
		}, BPPARAM = BPPARAM), stringsAsFactors = F)
		colnames(ret) <- c("gene","tx","exp_in",paste0("exp_in_",cond))
		}else{
		data <- t(sapply(part, function(x) Matrix::colSums(data[x,,drop=F])))
		data <- t(sapply(part, FUN = function(x) Matrix::colSums(data[x,,drop=F])))
		ret <- data.frame(rownames(data),
		Matrix::rowSums(data!=0)/ncol(data),
		sapply(cond, function(x){
		BiocParallel::bplapply(cond, FUN = function(x){
		group_data = data[,drim@samples$sample_id[drim@samples$condition==x],drop=F]
		return(Matrix::rowSums(group_data!=0)/ncol(group_data))
		}), stringsAsFactors = F)
		}, BPPARAM = BPPARAM), stringsAsFactors = F)
		colnames(ret) <- c("gene","exp_in",paste0("exp_in_",cond))
		}
		return(ret)
		@@ -271,17 +267,19 @@ check_unique_by_partition <- function(df, partitioning, columns=NULL){
		#' @param FUN Aggregation function. Can be a base function like `unique`, `length`, etc., or a custom function.
		#' @param columns Optional: Only aggregate the specified columns of `df`. Defaults to all columns.
		#' @inheritParams stats::aggregate.data.frame
		#' @param BPPARAM If multicore processing should be used, specify a `BiocParallelParam` object here. Among others, can be `SerialParam()` (default) for standard non-multicore processing or `MulticoreParam('number_cores')` for multicore processing. See \code{\link[BiocParallel:BiocParallel-package]{BiocParallel}} for more information.
		#'
		#' @return Data frame with Group column that specifies the partition and one column per specified column with aggregated values.
		#' @export
		get_by_partition <- function(df, partitioning, FUN, columns=NULL, simplify=T, drop=T){
		get_by_partition <- function(df, partitioning, FUN, columns=NULL, simplify=T, drop=T, BPPARAM=BiocParallel::SerialParam()){
		assertthat::assert_that(is.data.frame(df))
		assertthat::assert_that(is.list(partitioning))
		assertthat::assert_that(is.function(FUN))
		assertthat::assert_that(is(BPPARAM, "BiocParallelParam"), msg = "Please provide a valid BiocParallelParam object.")
		if(!is.null(columns)){
		assertthat::assert_that(all(columns %in% colnames(df)))
		df <- df[,columns]
		}
		return(stats::aggregate(df, by=list(rep(names(partitioning), lengths(partitioning))), FUN=FUN, simplify=simplify, drop=T))
		return(BiocParallel::bpaggregate(df, by=list(rep(names(partitioning), lengths(partitioning))), FUN=FUN, simplify=simplify, drop=T, BPPARAM = BPPARAM))
		}

R/visualization.R

+101 −32

File changed.

Preview size limit exceeded, changes collapsed.

Admin message