initial commit of lme4 DGE models and meta analysis functions (b1a09bdd) · Commits · github_fork / Presto 20250701

DESCRIPTION

+2 −2

Original line number	Diff line number	Diff line
		@@ -25,8 +25,8 @@ Imports:
		rlang,
		stats,
		utils,
		DESeq2
		RoxygenNote: 6.1.1
		arm
		RoxygenNote: 7.0.2
		Suggests:
		knitr,
		rmarkdown,

NAMESPACE

+10 −5

Original line number	Diff line number	Diff line
		@@ -4,6 +4,7 @@ S3method(nnzeroGroups,dgCMatrix)
		S3method(nnzeroGroups,matrix)
		S3method(rank_matrix,dgCMatrix)
		S3method(rank_matrix,matrix)
		S3method(split,presto)
		S3method(sumGroups,dgCMatrix)
		S3method(sumGroups,matrix)
		S3method(wilcoxauc,Seurat)
		@@ -12,16 +13,20 @@ S3method(wilcoxauc,default)
		S3method(wilcoxauc,seurat)
		export("%>%")
		export(collapse_counts)
		export(combine.model)
		export(combine.models)
		export(combine.presto)
		export(compute_fixed_effects)
		export(compute_hash)
		export(find_markers_glmm)
		export(find_markers_glmm_single_gene)
		export(glmm.presto)
		export(map_dfr_rev)
		export(meta_analysis.presto)
		export(nnzeroGroups)
		export(pseudobulk_deseq2)
		export(pseudobulk_one_vs_all)
		export(pseudobulk_pairwise)
		export(rank_matrix)
		export(sumGroups)
		export(summarize_dge_pairs)
		export(top_markers)
		export(top_markers_dds)
		export(wilcoxauc)
		import(Rcpp)
		importClassesFrom(Matrix,TsparseMatrix)

R/glmm.R

0 → 100644

+71 −0

Original line number	Diff line number	Diff line
		#' @export
		find_markers_glmm_single_gene <- function(dge_formula, design, y, main_effect, nsim) {
		## Estimate model
		glmer_res <- lme4::glmer(
		dge_formula, cbind(design, y), 'poisson',
		control = lme4::glmerControl(
		calc.derivs=FALSE, ## 1.1X speedup
		optimizer="nloptwrap" ## 2X speedup
		)
		)

		res <- list()

		## save the learned variance components
		res$var <- as_tibble(lme4::VarCorr(glmer_res)) %>%
		dplyr::select(grpvar = grp, vcov)
		## save all random and fixed effects
		res$ranef <- lme4::ranef(glmer_res, condVar=TRUE) %>%
		as.data.frame() %>%
		dplyr::select(-term)
		res$fixef <- data.frame(beta = fixef(glmer_res)) %>%
		tibble::rownames_to_column('effect')

		## get betas and SDs
		res$dge <- res$ranef %>%
		subset(grpvar == main_effect) %>%
		dplyr::select(group = grp, beta=condval) %>%
		dplyr::mutate(group = as.character(group)) ## undo factors
		res$dge$sigma <- as.numeric(apply(as.data.frame(arm::sim(glmer_res, n.sims=nsim)@ranef[main_effect]), 2, sd))

		return(res)
		}


		#' @export
		find_markers_glmm <- function(
		dge_formula, counts_mat, meta_data, main_effect, features=NULL, do_par=TRUE, nsim=100
		) {
		## TODO: check that main_effect is in formula
		if (do_par) {
		plan(multicore)
		it_fxn <- furrr::future_map
		} else {
		it_fxn <- purrr::map
		}
		if (is.null(features)) {
		features <- rownames(counts_mat$counts_mat)
		}

		## Get results for each gene
		lres <- it_fxn(features, function(feature_use) {
		find_markers_glmm_single_gene(
		dge_formula = dge_formula,
		design = meta_data,
		y = counts_mat[feature_use, ],
		main_effect,
		nsim
		)
		})
		names(lres) <- features

		# Then aggregate the three lists together
		common_el <- purrr::reduce(map(lres, names), intersect)
		res <- map(common_el, function(name) {
		map_dfr(lres, name, .id='feature')
		})
		names(res) <- common_el
		return(res)

		}

R/metaanalysis.R

0 → 100644

+220 −0

Original line number	Diff line number	Diff line
		#' split.presto
		#'
		#' Split presto object
		#'
		#' @param obj presto object
		#' @param splitvar split by this variable, defined in presto$meta_data
		#' @param .appendname (bool) whether to append name of obj to daughter object names
		#' @param .sep if .appendname is TRUE, sep character for appending
		#'
		#' @export
		split.presto <- function(obj, splitvar, .appendname = TRUE, .sep = '_') {
		meta_list <- pb$meta_data %>% split(pb$meta_data$Tissue)
		res <- map(names(meta_list), function(name) {
		if (.appendname) {
		newname <- paste(pb$name, name, sep = .sep)
		} else {
		newname <- name
		}

		list(
		name = newname,
		meta_data = meta_list[[name]],
		counts_mat = pb$counts_mat[, meta_list[[name]]$SampleID]
		)

		## TODO: if glmm has splitby variable, split those results too

		})

		names(res) <- names(meta_list)
		return(res)
		}

		#' glmm.presto
		#'
		#' Convenience wrapper to find_markers_glmm
		#'
		#' @param obj presto object
		#' @param dge_formula lme4 valid formula
		#' @param main_effect variable for which to simulate SD. Results stored in `$dge` (currently, must be random effect)
		#' @param analysis_name
		#' @param features
		#' @param do_par parallel will be done with furrr
		#' @param nsim number simulations to estimate SD of main_effect betas
		#' @param overwrite (bool) overwrite model object
		#'
		#' @export
		glmm.presto <- function(obj, dge_formula, main_effect, analysis_name, features=NULL, do_par=TRUE, nsim=100, overwrite=TRUE) {
		if (analysis_name %in% names(obj$models) & !overwrite) {
		stop(sprintf('glmm results \"%s\" already exists. Set overwrite=TRUE to replace', analysis_name))
		}
		obj$models[[analysis_name]] <- find_markers_glmm(
		dge_formula,
		obj$counts_mat,
		obj$meta_data,
		main_effect,
		features,
		do_par,
		nsim
		)

		return(obj)
		}



		#' map_dfr_rev
		#'
		#' @param x
		#' @param y
		#' @param bind_id
		#'
		#' @export
		map_dfr_rev <- function(y, x, bind_id) {
		## I don't think that purrr explicitly has this function
		map_dfr(x, y, .id = bind_id)
		}

		#' combine.model
		#'
		#' @param model_name
		#' @param models
		#' @param bind_id
		#'
		#' @export
		combine.model <- function(model_name, models, bind_id) {
		## functions to combine lme4 models
		model_list <- map(models, model_name)
		model_fields <- purrr::reduce(map(model_list, names), intersect)
		model_new <- map(model_fields, map_dfr_rev, model_list, bind_id)
		names(model_new) <- model_fields
		return(model_new)
		}

		#' combine.models
		#'
		#' Combine list of DGE models
		#'
		#' @param obj_list list of model objects
		#' @param bind_id column name to add to combined data_frames (passed to bind_rows .id parameter)
		#'
		#' @export
		combine.models <- function(obj_list, bind_id) {
		## get models from objects
		models <- map(obj_list, 'models')

		## loop over named models
		models_common <- purrr::reduce(map(models, names), intersect)
		res <- map(models_common, combine.model, models, bind_id)
		names(res) <- models_common
		return(res)
		}

		#' @export
		meta_analysis.presto <- function(models, type, key_names) {
		## TODO: check that feature and group are the same
		## TODO: instead of feature and group, make generic
		## TODO: check that key_names uniquely define rows
		res <- switch(
		type,
		'fixedeffects' = {
		## prepare data
		tests <- models[[1]] %>%
		tidyr::unite(key, all_of(key_names), sep = '_') %>%
		with(key)

		beta_mat <- models %>% map('beta') %>% dplyr::bind_cols(testid = tests) %>%
		tibble::column_to_rownames('testid') %>%
		as.matrix()

		sd_mat <- models %>% map('sigma') %>% dplyr::bind_cols(testid = tests) %>%
		tibble::column_to_rownames('testid') %>%
		as.matrix()

		## summary stats
		compute_fixed_effects(beta_mat, sd_mat) %>%
		tidyr::separate(test, key_names)

		},
		NULL ## default
		)
		if (is.null(res)) {
		stop(sprintf('meta analysis mode %s is not implemented', type))
		}
		return(res)
		}


		#' @export
		compute_fixed_effects <- function(B, S) {
		w <- 1 / (S^2)
		beta_fixed <- rowSums(w * B) / rowSums(w)
		se_fixed <- sqrt(1 / rowSums(w))

		Q_stat <- rowSums(w * (B - beta_fixed) ^ 2)
		I2 <- pmax(0, 100 * ((Q_stat - ncol(B) + 1) / Q_stat), na.rm = FALSE) ## if Q=0, I2 cannot be estimated
		Q_pval <- pchisq(Q_stat, ncol(B) - 1, lower.tail=FALSE)
		Q_fdr <- p.adjust(Q_pval, 'BH')


		wald_stat <- beta_fixed / se_fixed
		# wald_pval <- 2 * (1 - pt(abs(wald_stat), ncol(B) - 1)) ## t-test based p value: what's the right df?
		wald_pval <- 2 * (1 - pnorm(abs(wald_stat))) ## z-score based p value
		wald_fdr <- p.adjust(wald_pval, 'BH')

		res <- tibble(
		test = rownames(B),
		beta = beta_fixed, se = se_fixed,
		wald_stat, wald_pval, wald_fdr,
		Q_stat, I2, Q_pval, Q_fdr
		)

		return(res)
		}



		#' @export
		combine.presto <- function(
		obj_list,
		obj_name,
		bind_id,
		meta_analysis=list(NULL, 'fixedeffects')[[1]],
		meta_analysis_model=NULL,
		meta_analysis_keynames=NULL,
		combine_models=TRUE,
		...
		) {
		## Setup names
		obj <- list()
		obj$name <- obj_name
		obj$meta_analyses <- list()
		names(obj_list) <- map(obj_list, 'name')
		if (bind_id %in% colnames(obj_list[[1]]$meta_data)) {
		obj$meta_data <- map(obj_list, 'meta_data') %>% dplyr::bind_rows(.id=NULL)
		} else {
		obj$meta_data <- map(obj_list, 'meta_data') %>% dplyr::bind_rows(.id=bind_id)
		}
		obj$counts_mat <- map(obj_list, 'counts_mat') %>% purrr::reduce(Matrix::cbind2)

		## Bind models
		## TODO: implement combining models (with bind_rows)
		if (combine_models) {
		obj$models <- combine.models(obj_list, bind_id)
		}

		## Do meta analyses
		if (!is.null(meta_analysis)) {
		obj$meta_analyses[[meta_analysis_model]] <- obj_list %>%
		map('models') %>%
		map(meta_analysis_model) %>%
		map('dge') %>%
		meta_analysis.presto(meta_analysis, meta_analysis_keynames)
		}
		return(obj)
		}

R/pseudobulk.R

+193 −193

File changed.

Preview size limit exceeded, changes collapsed.

Admin message