first commit of functions for cached lme4 matrix results and analyses (594a67fb) · Commits · github_fork / Presto 20250701

NAMESPACE

+6 −10

Original line number	Diff line number	Diff line
		@@ -4,7 +4,6 @@ S3method(nnzeroGroups,dgCMatrix)
		S3method(nnzeroGroups,matrix)
		S3method(rank_matrix,dgCMatrix)
		S3method(rank_matrix,matrix)
		S3method(split,presto)
		S3method(sumGroups,dgCMatrix)
		S3method(sumGroups,matrix)
		S3method(wilcoxauc,Seurat)
		@@ -12,17 +11,14 @@ S3method(wilcoxauc,SingleCellExperiment)
		S3method(wilcoxauc,default)
		S3method(wilcoxauc,seurat)
		export("%>%")
		export(I2.presto)
		export(collapse_counts)
		export(combine.model)
		export(combine.models)
		export(combine.presto)
		export(compute_fixed_effects)
		export(compute_I2)
		export(compute_hash)
		export(find_markers_glmm)
		export(find_markers_glmm_single_gene)
		export(glmm.presto)
		export(map_dfr_rev)
		export(meta_analysis.presto)
		export(correct_counts)
		export(glmm_multi)
		export(glmm_uni)
		export(nlopt)
		export(nnzeroGroups)
		export(rank_matrix)
		export(sumGroups)

R/glmm.R

+313 −102

Original line number	Diff line number	Diff line
		#' @export
		regress_out_one_gene <- function(formula_full, formula_reduced, design, y, common_nUMI) {
		tryCatch({
		glmer_res <- lme4::glmer(
		formula_full, cbind(design, y), "poisson",
		control = lme4::glmerControl(calc.derivs = FALSE, optimizer = "nloptwrap")
		)

		## simulate all samples to the same read depth
		design$logUMI <- log(common_nUMI)
		ypred <- lme4:::predict.merMod(
		object = glmer_res,
		newdata = design,
		type = 'response',
		re.form = formula_reduced
		nlopt <- function(par, fn, lower, upper, control) {
		.nloptr <<- res <- nloptr(par, fn, lb = lower, ub = upper,
		opts = list(algorithm = "NLOPT_LN_BOBYQA", print_level = 1,
		maxeval = 1000, xtol_abs = 1e-6, ftol_abs = 1e-6))
		list(par = res$solution,
		fval = res$objective,
		conv = if (res$status > 0) 0 else res$status,
		message = res$message
		)
		yresid <- residuals(glmer_res, type = 'response')
		return(ypred + exp(yresid))
		}, error = function(e) {
		return(rep(NA, length(y)))
		})
		}


		#' @export
		regress_out.presto <- function(obj, formula_full, formula_reduced, features = NULL, do_par = TRUE, common_nUMI=1e6) {
		## TODO: check that formula_reduced only has RHS
		glmm_uni <- function(feature, model_base, response, nsim) {
		tryCatch({
		model <- lme4::refit(model_base, response[feature, ])
		## residuals
		epsilon <- as.numeric(residuals(model, 'response'))
		epsilon_pearson <- as.numeric(residuals(model, 'pearson'))

		if (do_par) {
		plan(multicore)
		it_fxn <- furrr::future_map
		}
		else {
		it_fxn <- purrr::map
		}
		if (is.null(features)) {
		features <- rownames(obj$counts_mat)
		}
		lres <- it_fxn(features, function(feature_use) {
		regress_out_one_gene(
		formula_full = formula_full,
		formula_reduced = formula_reduced,
		design = obj$meta_data,
		y = obj$counts_mat[feature_use, ],
		common_nUMI
		## beta for fixed and meta effects
		beta <- c(
		1, ## for offset
		as.numeric(fixef(model)), ## fixed effects
		as.numeric(as.data.frame(ranef(model))$condval) ## random effects
		)
		})

		obj$corr_mat <- Reduce(Matrix::rbind2, lres)
		rownames(obj$corr_mat) <- features
		colnames(obj$corr_mat) <- colnames(obj$counts_mat)
		return(obj)
		}



		#' @export
		find_markers_glmm_single_gene <- function(dge_formula, design, y, main_effect, nsim) {
		## Estimate model
		tryCatch({
		glmer_res <- lme4::glmer(
		dge_formula, cbind(design, y), 'poisson',
		control = lme4::glmerControl(
		calc.derivs=FALSE, ## 1.1X speedup
		optimizer="nloptwrap" ## 2X speedup
		)
		## sigma for fixed and random effects
		sigma <- c(
		0, ## for offset
		sqrt(diag(vcov(model))), ## fixed effects
		as.numeric(apply(as.data.frame(arm::sim(model, n.sims = nsim)@ranef), 2, sd)) ## random effects
		)

		res <- list()

		## save the learned variance components
		res$var <- as_tibble(lme4::VarCorr(glmer_res)) %>%
		dplyr::select(grpvar = grp, vcov)
		## save all random and fixed effects
		res$ranef <- lme4::ranef(glmer_res, condVar=TRUE) %>%
		as.data.frame() %>%
		dplyr::select(-term)
		res$fixef <- data.frame(beta = lme4::fixef(glmer_res)) %>%
		tibble::rownames_to_column('effect')

		## get betas and SDs
		res$dge <- res$ranef %>%
		subset(grpvar == main_effect) %>%
		dplyr::select(group = grp, beta=condval) %>%
		dplyr::mutate(group = as.character(group)) ## undo factors
		res$dge$sigma <- as.numeric(apply(as.data.frame(arm::sim(glmer_res, n.sims=nsim)@ranef[main_effect]), 2, sd))
		## prior distributions for random effects
		## CAUTION: Residuals are dropped for lmer without as.data.frame
		prior_sd <- as.numeric(as.data.frame(VarCorr(model))$vcov)
		# prior_sd <- as.numeric(VarCorr(model))
		if (isGLMM(model)) {
		## for GLMMs, use SD of pearson residuals
		prior_sd <- c(prior_sd, sd(residuals(model, 'pearson')))
		}

		return(res)
		return(list(beta = beta, sigma = sigma, epsilon = epsilon, epsilon_pearson = epsilon_pearson, prior_sd = prior_sd))
		}, error = function(e) {
		print(e)
		return(NA)
		})
		}


		#' @export
		find_markers_glmm <- function(
		dge_formula, counts_mat, meta_data, main_effect, features=NULL, do_par=TRUE, nsim=100
		) {
		## TODO: check that main_effect is in formula
		if (do_par) {
		plan(multicore)
		glmm_multi <- function(formula, design, response, features=NULL, parallel=FALSE, nsim=100, family='poisson') {
		if (parallel) {
		furrr::plan(multicore)
		it_fxn <- furrr::future_map
		} else {
		it_fxn <- purrr::map
		}
		if (is.null(features)) {
		features <- rownames(counts_mat)
		features <- rownames(response)
		}

		## Get results for each gene
		lres <- it_fxn(features, function(feature_use) {
		find_markers_glmm_single_gene(
		dge_formula = dge_formula,
		design = meta_data,
		y = counts_mat[feature_use, ],
		main_effect,
		nsim
		## initialize model on one feature
		if (family == 'nb') {
		model_base <- lme4::glmer.nb(
		formula, cbind(design, y = response[features[[1]], ]),
		control = lme4::glmerControl(calc.derivs = FALSE, optimizer = "nloptwrap")
		)
		})
		names(lres) <- features
		} else if (family == 'poisson') {
		model_base <- lme4::glmer(
		formula, cbind(design, y = response[features[[1]], ]), family,
		control = lme4::glmerControl(calc.derivs = FALSE, optimizer = "nloptwrap")
		)
		} else if (family == 'gaussian') {
		model_base <- lme4::lmer(
		formula, cbind(design, y = response[features[[1]], ]),
		control = lmerControl(optimizer = "nloptwrap", calc.derivs = FALSE)
		)
		} else {
		stop(sprintf('(G)LMM family `%s` not supported', family))
		}
		priornames_df <- as.data.frame(VarCorr(model_base))[, 1:3]
		if (isGLMM(model_base)) {
		## glmer does not include residuals in VarCorr, lmer does
		priornames_df <- rbind(priornames_df, tibble(grp = 'Residual', var1 = NA, var2 = NA))
		}

		## in case some glmer models failed
		betanames_df <- list(
		tibble(grpvar = 'OFFSET', term = 'OFFSET', grp = 'OFFSET'),
		tibble(grpvar = names(fixef(model_base)), term = grpvar, grp = grpvar),
		as.data.frame(ranef(model_base), stringsAsFactors = FALSE)[, 1:3]
		) %>%
		bind_rows()

		## then refit with new responses
		lres <- it_fxn(features, glmm_uni, model_base, response, nsim)
		names(lres) <- features
		lres <- lres[!is.na(lres)]

		# Then aggregate the three lists together
		# Aggregate results
		common_el <- purrr::reduce(map(lres, names), intersect)
		res <- map(common_el, function(name) {
		map_dfr(lres, name, .id='feature')
		as.matrix(map_dfr(lres, name))
		})
		names(res) <- common_el

		## remember things names
		res$betanames_df <- betanames_df
		res$priornames_df <- priornames_df
		res$meta_data <- design
		res$design <- list(model_base@resp$offset, t(model_base@pp$X), model_base@pp$Zt) %>%
		purrr::reduce(Matrix::rbind2)
		res$response <- response[names(lres), ]

		return(res)
		}



		#' @export
		correct_counts <- function(object, effects_remove, umi_common, verbose=0) {
		if (missing(umi_common)) {
		umi_common <- exp(mean(object$meta_data$logUMI))
		}

		## always remove read depth (offset or fixed)
		effects_remove <- union(effects_remove, c('OFFSET', 'logUMI') )
		idx_keep <- object$betanames_df %>%
		tibble::rowid_to_column('idx') %>%
		subset(!grepl(paste(effects_remove, collapse = '\|'), grpvar)) %>%
		with(idx)

		if (verbose > 0) {
		message('remove')
		object$betanames_df[-idx_keep, ] %>% with(unique(grpvar)) %>% print()
		message('preserve')
		object$betanames_df[idx_keep, ] %>% with(unique(grpvar)) %>% print()
		}

		effect_keep <- exp(Matrix::crossprod(object$design[idx_keep, ], object$beta[idx_keep, ]))
		object$corrected <- Matrix::t(umi_common * effect_keep + object$epsilon)
		object$corrected <- as(object$corrected, class(object$response)[[1]])

		return(object)
		}



		#' @export
		compute_I2 <- function(B, S) {
		## WARNING: only computes I2 across rows right now
		## TODO: add option for margin, to compute I2 across rows or colummns
		w <- 1 / (S^2)
		beta_fixed <- rowSums(w * B) / rowSums(w)
		Q_stat <- rowSums(w * (B - beta_fixed) ^ 2)
		I2 <- pmax(0, 100 * ((Q_stat - ncol(B) + 1) / Q_stat), na.rm = FALSE) ## if Q=0, I2 cannot be estimated
		return(I2)
		}


		#' @export
		I2.presto <- function(object, effect, within=NULL) {
		## TODO: save results, check if results were already computed in cache. Option to force recompute.
		if (is.null(within)) {
		if (!effect %in% object$priornames_df$grp) {
		stop(sprintf('Model does not contain random effect intercept for term %s', effect))
		}
		idx_use <- object$betanames_df %>%
		tibble::rowid_to_column('idx') %>%
		subset(grpvar == effect) %>%
		with(idx)

		I2 <- matrix(compute_I2(t(object$beta[idx_use, ]), t(object$sigma[idx_use, ])), ncol = 1)
		} else {
		## find interaction term effect that matches both
		## parse that interaction term into 2 effects
		effect_name <- object$priornames_df$grp[match(
		c(paste(within, effect, sep=':'), paste(effect, within, sep=':')),
		object$priornames_df$grp
		)]
		effect_name <- effect_name[!is.na(effect_name)]
		if (length(effect_name) == 0) {
		stop(sprintf('Model does not contain interaction term for %s and %s', effect, within))
		}

		## Now compute heterogeneity within nested group
		X <- object$betanames_df %>%
		tibble::rowid_to_column('idx') %>%
		subset(grpvar == effect_name) %>%
		tidyr::separate(grp, unlist(strsplit(effect_name, ':')), sep = ':')

		I2 <- split(X$idx, X[[within]]) %>% lapply(function(idx_use) {
		compute_I2(t(object$beta[idx_use, ]), t(object$sigma[idx_use, ]))
		}) %>%
		bind_rows() %>%
		as.matrix()
		}

		rownames(I2) <- colnames(object$beta)
		object$I2 <- I2

		## TODO: cache result is dictionary

		return(object)
		}




		# #' @export
		# regress_out_one_gene <- function(formula_full, formula_reduced, design, y, common_nUMI) {
		# tryCatch({
		# glmer_res <- lme4::glmer(
		# formula_full, cbind(design, y), "poisson",
		# control = lme4::glmerControl(calc.derivs = FALSE, optimizer = "nloptwrap")
		# )

		# ## simulate all samples to the same read depth
		# design$logUMI <- log(common_nUMI)
		# ypred <- lme4:::predict.merMod(
		# object = glmer_res,
		# newdata = design,
		# type = 'response',
		# re.form = formula_reduced
		# )
		# yresid <- residuals(glmer_res, type = 'response')
		# return(ypred + exp(yresid))
		# }, error = function(e) {
		# return(rep(NA, length(y)))
		# })
		# }


		# #' @export
		# regress_out.presto <- function(obj, formula_full, formula_reduced, features = NULL, do_par = TRUE, common_nUMI=1e6) {
		# ## TODO: check that formula_reduced only has RHS

		# if (do_par) {
		# plan(multicore)
		# it_fxn <- furrr::future_map
		# }
		# else {
		# it_fxn <- purrr::map
		# }
		# if (is.null(features)) {
		# features <- rownames(obj$counts_mat)
		# }
		# lres <- it_fxn(features, function(feature_use) {
		# regress_out_one_gene(
		# formula_full = formula_full,
		# formula_reduced = formula_reduced,
		# design = obj$meta_data,
		# y = obj$counts_mat[feature_use, ],
		# common_nUMI
		# )
		# })

		# obj$corr_mat <- Reduce(Matrix::rbind2, lres)
		# rownames(obj$corr_mat) <- features
		# colnames(obj$corr_mat) <- colnames(obj$counts_mat)
		# return(obj)
		# }



		# #' @export
		# find_markers_glmm_single_gene <- function(dge_formula, design, y, main_effect, nsim) {
		# ## Estimate model
		# tryCatch({
		# glmer_res <- lme4::glmer(
		# dge_formula, cbind(design, y), 'poisson',
		# control = lme4::glmerControl(
		# calc.derivs=FALSE, ## 1.1X speedup
		# optimizer="nloptwrap" ## 2X speedup
		# )
		# )

		# res <- list()

		# ## save the learned variance components
		# res$var <- as_tibble(lme4::VarCorr(glmer_res)) %>%
		# dplyr::select(grpvar = grp, vcov)
		# ## save all random and fixed effects
		# res$ranef <- lme4::ranef(glmer_res, condVar=TRUE) %>%
		# as.data.frame() %>%
		# dplyr::select(-term)
		# res$fixef <- data.frame(beta = lme4::fixef(glmer_res)) %>%
		# tibble::rownames_to_column('effect')

		# ## get betas and SDs
		# res$dge <- res$ranef %>%
		# subset(grpvar == main_effect) %>%
		# dplyr::select(group = grp, beta=condval) %>%
		# dplyr::mutate(group = as.character(group)) ## undo factors
		# res$dge$sigma <- as.numeric(apply(as.data.frame(arm::sim(glmer_res, n.sims=nsim)@ranef[main_effect]), 2, sd))

		# return(res)
		# }, error = function(e) {
		# return(NA)
		# })
		# }


		# #' @export
		# find_markers_glmm <- function(
		# dge_formula, counts_mat, meta_data, main_effect, features=NULL, do_par=TRUE, nsim=100
		# ) {
		# ## TODO: check that main_effect is in formula
		# if (do_par) {
		# plan(multicore)
		# it_fxn <- furrr::future_map
		# } else {
		# it_fxn <- purrr::map
		# }
		# if (is.null(features)) {
		# features <- rownames(counts_mat)
		# }

		# ## Get results for each gene
		# lres <- it_fxn(features, function(feature_use) {
		# find_markers_glmm_single_gene(
		# dge_formula = dge_formula,
		# design = meta_data,
		# y = counts_mat[feature_use, ],
		# main_effect,
		# nsim
		# )
		# })
		# names(lres) <- features

		# ## in case some glmer models failed
		# lres <- lres[!is.na(lres)]

		# # Then aggregate the three lists together
		# common_el <- purrr::reduce(map(lres, names), intersect)
		# res <- map(common_el, function(name) {
		# map_dfr(lres, name, .id='feature')
		# })
		# names(res) <- common_el
		# return(res)

		# }

R/metaanalysis.R

+217 −217

File changed.

Preview size limit exceeded, changes collapsed.

Admin message