Commit 69131e39 authored by Ilya Korsunsky's avatar Ilya Korsunsky
Browse files

augmented effects.presto to summarize effects over multiple (potentially) nested variables

parent dc3a6ebe
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -24,6 +24,7 @@ export(nnzeroGroups)
export(plot_ma.presto)
export(plot_volcano.presto)
export(presto.presto)
export(query.presto)
export(rank_matrix)
export(sumGroups)
export(top_markers)
+14 −28
Original line number Diff line number Diff line
@@ -354,35 +354,21 @@ genemeans.presto <- function(object, xpm=1e6) {


#' @export 
effects.presto <- function(object, effect, effect_levels=NULL, within=NULL, within_levels=NULL) {
    ## TODO: allow nested effects with the `within` parameter
    ## TODO: create user facing accessor function and cache effects computation to not recompute p values
    
    if (is.null(effect_levels)) {
        effect_levels <- object$betanames_df %>% 
            subset(grpvar == effect) %>% 
            with(grp)
effects.presto <- function(object, effects) {
    effects_available <- unique(object$betanames_df$grpvar)
    if (any(!effects %in% effects_available)) {
        missing_vars <- paste(setdiff(effects, effects_available), collapse = ', ')
        stop(sprintf('missing variables in object: %s', missing_vars))
    }
    
    object$effects <- dplyr::inner_join(
        cbind(object$betanames_df, object$beta) %>% 
            subset(grpvar == effect) %>% 
            dplyr::select(-grpvar, -term) %>%
            tidyr::gather(feature, beta, -grp),
        cbind(object$betanames_df, object$sigma) %>% 
            subset(grpvar == effect) %>% 
            dplyr::select(-grpvar, -term) %>%
            tidyr::gather(feature, sigma, -grp),
        by = c('grp', 'feature')
    ) %>% 
        dplyr::mutate(
            wald = beta / sigma,
            pval = 2 * (1 - pnorm(abs(beta / sigma)))
        ) %>% 
        dplyr::mutate(fdr = p.adjust(pval, 'BH')) %>% 
        dplyr::left_join(object$log_mu)
        

    beta_tidy <- effects %>% purrr::map(.make_tidy_beta, object) %>% purrr::reduce(.merge_betas)
    sigma_tidy <- effects %>% purrr::map(.make_tidy_sigma, object) %>% purrr::reduce(.merge_sigmas)
    suppressMessages({
        object$effect <- dplyr::full_join(beta_tidy, sigma_tidy) %>% 
            dplyr::left_join(object$log_mu) %>% 
            dplyr::mutate(wald = beta/sigma, pval = 2 * (1 - pnorm(abs(beta/sigma)))) %>% 
            dplyr::mutate(fdr = p.adjust(pval, "BH"))
    })    
    return(object)
}

R/glmm_internal.R

0 → 100644
+59 −0
Original line number Diff line number Diff line
.make_tidy_beta <- function(varname, object) {
    res <- cbind(object$betanames_df, object$beta) %>% 
        subset(grpvar == varname) %>% 
        dplyr::select(-grpvar) %>% 
        tidyr::gather(feature, beta, -grp, -term) %>% 
        identity()
    
    if (grepl(':', varname)) {
        ## nested variable
        res <- tidyr::separate(res, grp, unlist(strsplit(varname, ':')), sep = ':')        
    } else {
        ## not nested
        res[[varname]] <- res$grp
        res$grp <- NULL
    } 

    return(res)
}

.make_tidy_sigma <- function(varname, object) {
    res <- cbind(object$betanames_df, object$sigma) %>% 
        subset(grpvar == varname) %>% 
        dplyr::select(-grpvar) %>% 
        tidyr::gather(feature, sigma, -grp, -term) %>% 
        identity()
    
    if (grepl(':', varname)) {
        ## nested variable
        res <- tidyr::separate(res, grp, unlist(strsplit(varname, ':')), sep = ':')        
    } else {
        ## not nested
        res[[varname]] <- res$grp
        res$grp <- NULL
    } 

    return(res)
}


.merge_betas <- function(X1, X2) {
    terms_join <- setdiff(intersect(colnames(X1), colnames(X2)), 'beta')
    suppressMessages({
        dplyr::full_join(X1, X2, by = terms_join) %>% 
            dplyr::rowwise() %>% 
            dplyr::mutate(beta = sum(beta.x, beta.y, na.rm = TRUE)) %>% 
            dplyr::select(-beta.x, -beta.y)    
    })
}


.merge_sigmas <- function(X1, X2) {
    terms_join <- setdiff(intersect(colnames(X1), colnames(X2)), 'sigma')
    suppressMessages({
    dplyr::full_join(X1, X2, by = terms_join) %>% 
        dplyr::rowwise() %>% 
        dplyr::mutate(sigma = sqrt(sum(sigma.x ^ 2, sigma.y ^ 2, na.rm = TRUE))) %>% 
        dplyr::select(-sigma.x, -sigma.y)
    })
}