Commit 510a37ab authored by tekath's avatar tekath
Browse files

Updated posthoc and dtu table creation.

Add ability to specify columns to add.
parent b8759a68
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@ export(get_by_partition)
export(import_counts)
export(import_gtf)
export(move_columns_to_front)
export(plot_dtu_table)
export(posthoc_and_stager)
export(rm_version)
export(run_drimseq)
+29 −25
Original line number Diff line number Diff line
@@ -83,7 +83,7 @@ import_counts <- function(files, type, ...){
#' @param tx_list List of sparse transcription count matrices, as returned by `import_counts()` for single-cell data.
#' @param cell_extensions Optional list of cellname extensions that are added to the cellnames of one sample. The cellnames and the extension are separated by an underscore '_'.
#' @param seurat_obj Optional seurat object, where the combined matrix is added as an assay. This has the advantage, that the cells are matched and subsetted if necessary. Currently only Seurat 3 objects are supported.
#' @param tx2gene Optional tx2gene/metadata data frame, which is added as feature-level meta data to the created assay. The first column of the data frame must contain transcript names/ids. The same transcript names/ids as in the `tx_list` objects must be used.
#' @param tx2gene Optional tx2gene/metadata data frame, can only be used in conjunction with a seurat object. Metadata is added as feature-level meta data to the created assay. The first column of the data frame must contain transcript names/ids. The same transcript names/ids as in the `tx_list` objects must be used.
#' @param assay_name If the combined matrix should be added to an existing Seurat object, the name of the assay can be specified here.
#'
#' @return Either a combined sparse transcription count matrix or a seurat object which the  combined sparse transcription count matrix as an assay.
@@ -247,21 +247,25 @@ run_drimseq <- function(counts, tx2gene, pd, id_col=NULL, cond_col, cond_levels=
        assertthat::assert_that(packageVersion("Seurat")>="3.0.0", msg = "At least Version 3 of Seurat is needed. Currently only Seurat 3 objects are supported.")
        assertthat::assert_that(counts@version>="3.0.0", msg = "The provided 'counts' is not a Seurat 3 object. Currently only Seurat 3 objects are supported.")
        if(is.vector(tx2gene)){
            if(ncol(counts[[counts@active.assay]]@meta.features)>1){
            meta_df <- counts[[counts@active.assay]]@meta.features
            assertthat::assert_that(ncol(meta_df)>1, msg="No feature-level meta data in active assay of seurat object. Was 'tx2gene' provided in 'combine_to_matrix()'?\nAlternatively provide a real tx2gene dataframe.")
            assertthat::assert_that(all(tx2gene %in% colnames(meta_df)), msg = "Not all provided 'tx2gene' colnames are present in the feature-level meta data.")
            tx2gene <-  counts[[counts@active.assay]]@meta.features[,tx2gene]
            }else{
                stop("No feature-level meta data in seurat object. Was 'tx2gene' provided in 'combine_to_matrix()'?\nAlternatively provide a real tx2gene dataframe.")
            }
        }
        counts <- GetAssayData(counts)
    }
    assertthat::assert_that(is(counts, "matrix")|is(counts, "sparseMatrix"))
    assertthat::assert_that(is(tx2gene, "data.frame"))
    assertthat::assert_that(is(pd, "data.frame"))
    assertthat::assert_that(ncol(tx2gene)>1)
    assertthat::assert_that(cond_col %in% colnames(pd), msg = paste0("Could not find", cond_col, " in colnames of pd."))
    assertthat::assert_that(filtering_strategy %in% c("bulk", "sc", "own"), msg = "Please select a valid filtering strategy ('bulk', 'sc' or 'own').")
    assertthat::assert_that(is(BPPARAM, "BiocParallelParam"), msg = "Please provide a valid BiocParallelParam object.")
    assertthat::assert_that(is.logical(force_dense))
    assertthat::assert_that(is.logical(carry_over_metadata))
    assertthat::assert_that(all(rownames(counts) %in% tx2gene[[1]]), msg = "Not all rownames of the counts are present in the first tx2gene column. You may want to reorder the tx2gene columns with 'move_columns_to_front()'.")

    tx2gene <- rapply(tx2gene, as.character, classes="factor", how="replace")
    tx2gene <- tx2gene[match(rownames(counts), tx2gene[[1]]),]
    assertthat::assert_that(nrow(tx2gene)==nrow(counts))
    colnames(tx2gene)[c(1,2)] <- c("feature_id", "gene_id")
@@ -320,8 +324,9 @@ run_drimseq <- function(counts, tx2gene, pd, id_col=NULL, cond_col, cond_levels=
    own={
        filter_opt_list <- modifyList(filter_opt_list, list(...))
    })
    counts <- do.call(sparse_filter, args = c(list(counts = counts, tx2gene = tx2gene, BPPARAM = BPPARAM),
                                              filter_opt_list))
    tictoc::tic("Filter")
    counts <- do.call(sparse_filter, args = c(list("counts" = counts, "tx2gene" = tx2gene, "BPPARAM" = BPPARAM), filter_opt_list), quote=TRUE)
    tictoc::toc(log = T)
    tx2gene <- tx2gene[match(rownames(counts), tx2gene$feature_id),]

    if(is(counts, 'sparseMatrix')&force_dense){
@@ -336,15 +341,13 @@ run_drimseq <- function(counts, tx2gene, pd, id_col=NULL, cond_col, cond_levels=
                     format(structure(as.double(nrow(counts))*as.double(ncol(counts))*8, class="object_size"), units="auto"), " of memory.")
            })
    }
    # counts <- data.frame(tx2gene, counts, row.names = NULL, stringsAsFactors = F)
    #
    # drim <- DRIMSeq::dmDSdata(counts = counts, samples = samp)

    drim <- sparseDRIMSeq::sparse_dmDSdata(tx2gene = tx2gene, counts = counts, samples = samp)

    exp_in_tx <- ratio_expression_in(drim, "tx")
    exp_in_gn <- ratio_expression_in(drim, "gene")

    tictoc::tic("Ratios")
    exp_in_tx <- ratio_expression_in(drim, "tx", BPPARAM=BPPARAM)
    exp_in_gn <- ratio_expression_in(drim, "gene", BPPARAM=BPPARAM)
    tictoc::toc(log = T)
    tictoc::tic("Metadata")
    #carry over metadata
    if(carry_over_metadata&ncol(tx2gene)>2){
        temp <- tx2gene[match(rownames(exp_in_tx), tx2gene$feature_id),]
@@ -353,13 +356,12 @@ run_drimseq <- function(counts, tx2gene, pd, id_col=NULL, cond_col, cond_levels=
            exp_in_tx <- cbind(exp_in_tx, temp[,-c(1,2)])
            add_to_gene_columns <- check_unique_by_partition(temp[,-c(1,2)], drim@counts@partitioning)
            if(!is.null(add_to_gene_columns)){
                temp <- get_by_partition(df = temp, columns =add_to_gene_columns, partitioning = drim@counts@partitioning, FUN=unique)
                temp <- get_by_partition(df = temp, columns =add_to_gene_columns, partitioning = drim@counts@partitioning, FUN=unique, BPPARAM = BPPARAM)
                exp_in_gn <- cbind(exp_in_gn, temp[match(rownames(exp_in_gn), temp[[1]]),-c(1)])
            }
        }
    }


    tictoc::toc(log = T)
    design_full <- model.matrix(~condition, data=samp)

    tictoc::toc(log = T)
@@ -385,6 +387,7 @@ run_drimseq <- function(counts, tx2gene, pd, id_col=NULL, cond_col, cond_levels=


#TODO: implement Noise - IQR filtering
#TODO: Posthoc!

#' Posthoc filtering and two-staged statistical tests
#'
@@ -399,13 +402,13 @@ run_drimseq <- function(counts, tx2gene, pd, id_col=NULL, cond_col, cond_levels=
#' @param ofdr Overall false discovery rate (OFDR) threshold.
#' @param posthoc_filt Specify the minimal standard deviation of a transcripts porportion level that should be kept when performing posthoc filtering. To disbale poshoc filtering 0 or `FALSE` can be provided.
#' @return An extended `dturtle` object. Additional slots include:
#' - `DE_gene`:
#' - `DE_tx` :
#' - `FDR_table` :
#' - `DE_gene`: A character vector of all genes where the first stageR step was significant. Basically the significant genes, that showed signs of DTU.
#' - `DE_tx` : A named character vector of transcripts where the second stageR step was significant. Basically the significant transcripts of the significant genes.
#' - `FDR_table` : A data frame of the stage-wise adjusted p-values for all genes/transcripts. Might contain NA-values, as transcript level p-values are not avaible when the gene level test was not significant.
#'
#' @family DTUrtle
#' @export
#' @seealso [run_drimseq()] for DTU object creation. [create_dtu_table()] for result visualization.
#' @seealso [run_drimseq()] for DTU object creation. [create_dtu_table()] for result table creation.
#'
#' @examples
posthoc_and_stager <- function(dturtle, ofdr=0.05, posthoc=0.1){
@@ -417,8 +420,8 @@ posthoc_and_stager <- function(dturtle, ofdr=0.05, posthoc=0.1){
    }
    assertthat::assert_that(0<=ofdr & ofdr<=1, msg = "The provided 'ofdr' parameter is invalid. Must be a number between [0,1].")

    res <- DRIMSeq::results(dturtle$drim)
    res_txp <- DRIMSeq::results(dturtle$drim, level="feature")
    res <- sparseDRIMSeq::results(dturtle$drim)
    res_txp <- sparseDRIMSeq::results(dturtle$drim, level="feature")

    if(posthoc!=F|posthoc>0){
        res_txp <- run_posthoc(dturtle$drim, posthoc)
@@ -438,7 +441,8 @@ posthoc_and_stager <- function(dturtle, ofdr=0.05, posthoc=0.1){
    DE_gene <- unique(as.character(fdr_table$geneID[fdr_table$gene<ofdr]))

    if(length(DE_gene)>0){
        DE_tx <- fdr_table$txID[fdr_table$gene<ofdr&fdr_table$transcript<ofdr]
        temp <- fdr_table[fdr_table$gene<ofdr&fdr_table$transcript<ofdr,]
        DE_tx <- setNames(as.character(temp$txID), temp$geneID)
        message("Found ",length(DE_gene)," significant genes with ",length(DE_tx)," significant transcripts (OFDR: ",ofdr,")")
    }else{
        DE_gene <- NULL
+3 −228
Original line number Diff line number Diff line
@@ -6,20 +6,20 @@
#'
#' @param counts Sparse count matrix.
#' @param tx2gene Feature to gene mapping.
#' @inheritParams DRIMSeq::dmFilter
#' @param BPPARAM If multicore processing should be used, specify a `BiocParallelParam` object here. Among others, can be `SerialParam()` (default) for standard non-multicore processing or `MulticoreParam('number_cores')` for multicore processing. See \code{\link[BiocParallel:BiocParallel-package]{BiocParallel}} for more information.
#' @inheritParams sparseDRIMSeq::dmFilter
#'
#' @return Filtered sparse matrix
sparse_filter <- function(counts, tx2gene, BPPARAM=BiocParallel::SerialParam(), min_samps_gene_expr = 0,
                          min_gene_expr = 0, min_samps_feature_expr = 0, min_feature_expr = 0,
                          min_samps_feature_prop = 0, min_feature_prop = 0,
                          run_gene_twice=FALSE){

    assertthat::assert_that(is(counts, "matrix")|is(counts, "sparseMatrix"))
    counts <- counts[Matrix::rowSums(counts)>0,]
    tx2gene <- tx2gene[match(rownames(counts), tx2gene[[1]]),]
    #genes with at least two transcripts
    inds <- which(duplicated(tx2gene[[2]]) | duplicated(tx2gene[[2]], fromLast = TRUE))
    inds <- split(inds, f = tx2gene[inds,2])

    filter <- function(row_index){
        expr_features <- counts[row_index,]

@@ -71,231 +71,6 @@ sparse_filter <- function(counts, tx2gene, BPPARAM=BiocParallel::SerialParam(),

    counts_new <- BiocParallel::bplapply(inds, FUN=filter, BPPARAM=BPPARAM)
    counts_new <- counts[unlist(counts_new),]

    assertthat::assert_that(nrow(counts_new)>0, msg = "No Features left after filtering. Maybe try more relaxed filtering parameter.")
    message("Retain ",nrow(counts_new), " of ",nrow(counts)," features.\nRemoved ", nrow(counts)-nrow(counts_new), " features.")
    return(counts_new)
}




sparse_filter_naive <- function(counts, tx2gene, min_samps_gene_expr = 0,
                                min_gene_expr = 0, min_samps_feature_expr = 0, min_feature_expr = 0,
                                min_samps_feature_prop = 0, min_feature_prop = 0,
                                run_gene_twice=FALSE){
    counts <- counts[Matrix::rowSums(counts)>0,]
    tx2gene <- tx2gene[match(rownames(counts), tx2gene[[1]]),]
    #genes with at least two transcripts
    inds <- unique(tx2gene[[2]][duplicated(tx2gene[[2]])])
    inds <- lapply(inds, FUN=function(x) counts[tx2gene[[1]][tx2gene[[2]]==x],])

    counts_new <- NULL
    for(expr_features in inds){
        ### no genes with no expression
        if(sum(expr_features, na.rm = TRUE) == 0)
            next()

        ### genes with min expression
        if(! sum(Matrix::colSums(expr_features) >= min_gene_expr, na.rm = TRUE) >=
           min_samps_gene_expr )
            next()

        ### no features with no expression
        features2keep <- Matrix::rowSums(expr_features > 0, na.rm = TRUE) > 0

        ### no genes with one feature
        if(sum(features2keep) <= 1)
            next()

        expr_features <- expr_features[features2keep, , drop = FALSE]

        ### features with min expression
        features2keep <- Matrix::rowSums(expr_features >= min_feature_expr, na.rm = TRUE) >=
            min_samps_feature_expr

        ### no genes with one feature
        if(sum(features2keep) <= 1)
            next()

        expr_features <- expr_features[features2keep, , drop = FALSE]

        ### genes with zero expression
        samps2keep <- Matrix::colSums(expr_features) > 0 & !is.na(expr_features[1, ])

        if(sum(samps2keep) < max(1, min_samps_feature_prop))
            next()

        temp <- expr_features[, samps2keep, drop = FALSE]
        prop <- sweep(temp, 2, Matrix::colSums(temp), "/")

        ### features with min proportion
        features2keep <- Matrix::rowSums(prop >= min_feature_prop) >= min_samps_feature_prop

        ### no genes with one feature
        if(sum(features2keep) <= 1)
            next()

        expr <- expr_features[features2keep, , drop = FALSE]

        if (run_gene_twice) {
            ### no genes with no expression
            if(sum(expr_features, na.rm = TRUE) == 0)
                next()

            ### genes with min expression
            if(! sum(Matrix::colSums(expr_features) >= min_gene_expr, na.rm = TRUE) >=
               min_samps_gene_expr )
                next()
        }
        counts_new <- rbind(counts_new, expr)
    }
    assertthat::assert_that(nrow(counts_new)>0, msg = "No Features left after filtering. Maybe try more relaxed filtering parameter.")
    message("Retain ",nrow(counts_new), " of ",nrow(counts)," features.\nRemoved ", nrow(counts)-nrow(counts_new), " features.")
    return(counts_new)
}

sparse_filter_naive_parallel <- function(counts, tx2gene, BPPARAM=BiocParallel::SerialParam(), min_samps_gene_expr = 0,
                                         min_gene_expr = 0, min_samps_feature_expr = 0, min_feature_expr = 0,
                                         min_samps_feature_prop = 0, min_feature_prop = 0,
                                         run_gene_twice=FALSE){


    tx2gene <- tx2gene[match(rownames(counts), tx2gene[[1]]),]
    #genes with at least two transcripts
    inds <- unique(tx2gene[[2]][duplicated(tx2gene[[2]])])
    inds <- lapply(inds, FUN=function(x) counts[tx2gene[[1]][tx2gene[[2]]==x],])

    filter <- function(expr_features){
        ### no genes with no expression
        if(sum(expr_features, na.rm = TRUE) == 0)
            return(NULL)

        ### genes with min expression
        if(! sum(Matrix::colSums(expr_features) >= min_gene_expr, na.rm = TRUE) >=
           min_samps_gene_expr )
            return(NULL)

        ### no features with no expression
        features2keep <- Matrix::rowSums(expr_features > 0, na.rm = TRUE) > 0

        ### no genes with one feature
        if(sum(features2keep) <= 1)
            return(NULL)

        expr_features <- expr_features[features2keep, , drop = FALSE]

        ### features with min expression
        features2keep <- Matrix::rowSums(expr_features >= min_feature_expr, na.rm = TRUE) >=
            min_samps_feature_expr

        ### no genes with one feature
        if(sum(features2keep) <= 1)
            return(NULL)

        expr_features <- expr_features[features2keep, , drop = FALSE]

        ### genes with zero expression
        samps2keep <- Matrix::colSums(expr_features) > 0 & !is.na(expr_features[1, ])

        if(sum(samps2keep) < max(1, min_samps_feature_prop))
            return(NULL)

        temp <- expr_features[, samps2keep, drop = FALSE]
        prop <- sweep(temp, 2, Matrix::colSums(temp), "/")

        ### features with min proportion
        features2keep <- Matrix::rowSums(prop >= min_feature_prop) >= min_samps_feature_prop

        ### no genes with one feature
        if(sum(features2keep) <= 1)
            return(NULL)

        expr <- expr_features[features2keep, , drop = FALSE]

        if (run_gene_twice) {
            ### no genes with no expression
            if(sum(expr_features, na.rm = TRUE) == 0)
                return(NULL)

            ### genes with min expression
            if(! sum(Matrix::colSums(expr_features) >= min_gene_expr, na.rm = TRUE) >=
               min_samps_gene_expr )
                return(NULL)
        }
        return(expr)
    }

    counts_new <- BiocParallel::bplapply(inds, FUN=filter, BPPARAM=BPPARAM)
    counts_new <- do.call(rbind, counts_new)

    assertthat::assert_that(nrow(counts_new)>0, msg = "No Features left after filtering. Maybe try more relaxed filtering parameter.")
    return(counts_new)
}

sparse_filter_index <- function(counts, tx2gene, min_samps_gene_expr = 0,
                                min_gene_expr = 0, min_samps_feature_expr = 0, min_feature_expr = 0,
                                min_samps_feature_prop = 0, min_feature_prop = 0,
                                run_gene_twice=FALSE){

    counts <- counts[Matrix::rowSums(counts)>0,]
    tx2gene <- tx2gene[match(rownames(counts), tx2gene[[1]]),]
    #genes with at least two transcripts
    inds <- which(duplicated(tx2gene[[2]]) | duplicated(tx2gene[[2]], fromLast = TRUE))
    inds <- split(inds, f = tx2gene[inds,2])

    filter <- function(row_index){
        expr_features <- counts[row_index,]

        ### genes with min expression
        if(! sum(Matrix::colSums(expr_features) >= min_gene_expr, na.rm = TRUE) >=
           min_samps_gene_expr )
            return(NULL)

        ### features with min expression
        row_index <- Matrix::rowSums(expr_features >= min_feature_expr, na.rm = TRUE) >=
            min_samps_feature_expr

        ### no genes with one feature
        if(sum(row_index) <= 1)
            return(NULL)

        expr_features <- expr_features[row_index, , drop = FALSE]

        ### genes with zero expression
        samps2keep <- Matrix::colSums(expr_features) > 0 & !is.na(expr_features[1, ])

        if(sum(samps2keep) < max(1, min_samps_feature_prop))
            return(NULL)

        temp <- expr_features[, samps2keep, drop = FALSE]
        prop <- sweep(temp, 2, Matrix::colSums(temp), "/")

        ### features with min proportion
        row_index <- Matrix::rowSums(prop >= min_feature_prop) >= min_samps_feature_prop

        ### no genes with one feature
        if(sum(row_index) <= 1)
            return(NULL)

        expr <- expr_features[row_index, , drop = FALSE]

        if (run_gene_twice) {
            ### no genes with no expression
            if(sum(expr_features, na.rm = TRUE) == 0)
                return(NULL)

            ### genes with min expression
            if(! sum(Matrix::colSums(expr_features) >= min_gene_expr, na.rm = TRUE) >=
               min_samps_gene_expr )
                return(NULL)
        }
        return(rownames(expr))
    }

    counts_new <- lapply(inds, FUN=filter)
    counts_new <- counts[unlist(counts_new),]

    assertthat::assert_that(nrow(counts_new)>0, msg = "No Features left after filtering. Maybe try more relaxed filtering parameter.")
    message("Retain ",nrow(counts_new), " of ",nrow(counts)," features.\nRemoved ", nrow(counts)-nrow(counts_new), " features.")
    return(counts_new)
+23 −25
Original line number Diff line number Diff line
@@ -17,7 +17,7 @@ no_na <- function(x){
#' @return
smallProportionSD <- function(d, filter) {
    browser()
    d <- DRIMSeq::counts(d)
    d <- sparseDRIMSeq::counts(d)
    cts <- as.matrix(d[,!colnames(d) %in% c("gene_id", "feature_id")])
    gene.cts <- rowsum(cts, d$gene_id)
    total.cts <- gene.cts[match(d$gene_id, rownames(gene.cts)),]
@@ -87,9 +87,9 @@ seurat_add_tx2gene <- function(seurat_obj, tx2gene){
#' @param drim
#' @param filt Threshold
#'
#' @return A filtered `DRIMSeq::results()` data frame.
#' @return A filtered `sparseDRIMSeq::results()` data frame.
run_posthoc <- function(drim, filt){
    res_txp_filt <- DRIMSeq::results(drim, level="feature")
    res_txp_filt <- sparseDRIMSeq::results(drim, level="feature")
    filt <- smallProportionSD(drim, filt)
    res_txp_filt$pvalue[filt] <- 1
    res_txp_filt$adj_pvalue[filt] <- 1
@@ -112,23 +112,19 @@ get_diff <- function(gID, dturtle){
    return(y)
}

#' Add a column specifying the maximal difference between the two comparison groups

#' Return the maximal absolute difference for all transcripts of the procvided gene
#'
#' @param dtu_table The dtu data frame where the column shall be added.
#' @param dturtle The corresponding dturtle object the `dtu_table` originates from.
#' @param gID gene-identifier
#' @param dturtle dturtle object
#'
#' @return A dtu data frame with the added column.
add_max_delta <- function(dtu_table, dturtle){
    getmax <- function(gID){
#' @return The maximal absolute difference value
getmax <- function(gID, dturtle){
    y <- get_diff(gID, dturtle)
    #get absoulte maximum while preserving sign
    return(y$diff[which.max(abs(y$diff))])
}

    dtu_table[[paste0("max(",levels(dturtle$group)[1], "-",levels(dturtle$group)[2],")")]] <- as.numeric(sapply(dtu_table$geneID, FUN = getmax))
    return(dtu_table)
}


#' Parse a gtf file and return a transcript level dataframe.
#'
@@ -192,11 +188,11 @@ rm_version <- function(x){
#' @param type Type of the summarization that shall be performed. Options are:
#' - `'tx'`: Transcript-level expressed-in ratios.
#' - `'gene'`: Gene-level expressed-in ratios.
#'
#' @param BPPARAM If multicore processing should be used, specify a `BiocParallelParam` object here. Among others, can be `SerialParam()` (default) for standard non-multicore processing or `MulticoreParam('number_cores')` for multicore processing. See \code{\link[BiocParallel:BiocParallel-package]{BiocParallel}} for more information.
#' @return Data frame with the expressed-in ratios.
#'
#' @examples
ratio_expression_in <- function(drim, type){
ratio_expression_in <- function(drim, type, BPPARAM=BiocParallel::SerialParam()){
    assertthat::assert_that(type %in% c("tx", "gene"))
    part <- drim@counts@partitioning
    data <- drim@counts@unlistData
@@ -209,19 +205,19 @@ ratio_expression_in <- function(drim, type){
        ret <- data.frame(rep(names(part), lengths(part)),
                          rownames(data),
                          Matrix::rowSums(data!=0)/ncol(data),
                          sapply(cond, function(x){
                          BiocParallel::bplapply(cond, FUN = function(x){
                              group_data = data[,drim@samples$sample_id[drim@samples$condition==x],drop=F]
                              return(Matrix::rowSums(group_data!=0)/ncol(group_data))
                          }), stringsAsFactors = F)
                          }, BPPARAM = BPPARAM), stringsAsFactors = F)
        colnames(ret) <- c("gene","tx","exp_in",paste0("exp_in_",cond))
    }else{
        data <-  t(sapply(part, function(x) Matrix::colSums(data[x,,drop=F])))
        data <-  t(sapply(part, FUN = function(x) Matrix::colSums(data[x,,drop=F])))
        ret <- data.frame(rownames(data),
                          Matrix::rowSums(data!=0)/ncol(data),
                          sapply(cond, function(x){
                          BiocParallel::bplapply(cond, FUN = function(x){
                              group_data = data[,drim@samples$sample_id[drim@samples$condition==x],drop=F]
                              return(Matrix::rowSums(group_data!=0)/ncol(group_data))
                          }), stringsAsFactors = F)
                          }, BPPARAM = BPPARAM), stringsAsFactors = F)
        colnames(ret) <- c("gene","exp_in",paste0("exp_in_",cond))
    }
    return(ret)
@@ -271,17 +267,19 @@ check_unique_by_partition <- function(df, partitioning, columns=NULL){
#' @param FUN Aggregation function. Can be a base function like `unique`, `length`, etc., or a custom function.
#' @param columns Optional: Only aggregate the specified columns of `df`. Defaults to all columns.
#' @inheritParams stats::aggregate.data.frame
#' @param BPPARAM If multicore processing should be used, specify a `BiocParallelParam` object here. Among others, can be `SerialParam()` (default) for standard non-multicore processing or `MulticoreParam('number_cores')` for multicore processing. See \code{\link[BiocParallel:BiocParallel-package]{BiocParallel}} for more information.
#'
#' @return Data frame with Group column that specifies the partition and one column per specified column with aggregated values.
#' @export
get_by_partition <- function(df, partitioning, FUN, columns=NULL, simplify=T, drop=T){
get_by_partition <- function(df, partitioning, FUN, columns=NULL, simplify=T, drop=T, BPPARAM=BiocParallel::SerialParam()){
    assertthat::assert_that(is.data.frame(df))
    assertthat::assert_that(is.list(partitioning))
    assertthat::assert_that(is.function(FUN))
    assertthat::assert_that(is(BPPARAM, "BiocParallelParam"), msg = "Please provide a valid BiocParallelParam object.")
    if(!is.null(columns)){
        assertthat::assert_that(all(columns %in% colnames(df)))
        df <- df[,columns]
    }
    return(stats::aggregate(df, by=list(rep(names(partitioning), lengths(partitioning))), FUN=FUN,  simplify=simplify, drop=T))
    return(BiocParallel::bpaggregate(df, by=list(rep(names(partitioning), lengths(partitioning))), FUN=FUN,  simplify=simplify, drop=T, BPPARAM = BPPARAM))
}
+101 −32

File changed.

Preview size limit exceeded, changes collapsed.

Loading