Commit 064d2915 authored by smorabit's avatar smorabit
Browse files

additional arguments for MetacellsByGroups

parent 8337a343
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
Package: hdWGCNA
Title: hdWGCNA
Version: 0.1.1.9004
Version: 0.1.1.9005
Authors@R: c(
    person("Sam", "Morabito", , "smorabit@uci.edu", role = c("aut", "cre"),
           comment = c(ORCID = "0000-0002-7768-4856")),
+7 −0
Original line number Diff line number Diff line
# hdWGCNA 0.1.1.9005 (2022-06-17)
## Added
- None

## Changes
- Added new arguments to `MetacellsByGroups` and `ConstructMetacells` to exclude very small groups (`min_cells`), to reach a target number of metacells (`target_metacells`), and to exclude metacells with too much overlap (`max_shared`).

# hdWGCNA 0.1.1.9004 (2022-6-13)
## Added
- None
+10 −2
Original line number Diff line number Diff line
@@ -184,6 +184,7 @@ SetDatExpr <- function(
  multi_group_name = NULL,
  return_seurat = TRUE,
  wgcna_name=NULL,
  assay=NULL,
  slot = 'data',
  ...
){
@@ -195,7 +196,10 @@ SetDatExpr <- function(
  params <- GetWGCNAParams(seurat_obj, wgcna_name)
  genes_use <- GetWGCNAGenes(seurat_obj, wgcna_name)
  modules <- GetModules(seurat_obj, wgcna_name)

  if(is.null(assay)){
    assay <- params$metacell_assay
  }

  # use metacells or whole seurat object?
  if(use_metacells){
@@ -204,6 +208,11 @@ SetDatExpr <- function(
    s_obj <- seurat_obj
  }

  # check the assay:
  if(!(assay %in% names(s_obj@assays))){
    stop("Assay not found. Check names(seurat_obj@assays) or names(GetMetacellObject(seurat_obj)@assays)")
  }

  # check that group.by is in the Seurat object & in the metacell object:
  if(!(group.by %in% colnames(s_obj@meta.data))){
    m_cell_message <- ""
@@ -221,7 +230,6 @@ SetDatExpr <- function(

  # check that the group names are actually in the group.by column:


  # subset further if multiExpr:
  if(!is.null(multi.group.by)){
    seurat_meta <- seurat_meta %>% subset(get(multi.group.by) %in% multi_group_name)
+37 −13
Original line number Diff line number Diff line
@@ -11,6 +11,9 @@
#' @param return_metacell Logical to determine if we return the metacell seurat object (TRUE), or add it to the misc in the original Seurat object (FALSE). Default to FALSE.
#' @param mode determines how to make gene expression profiles for metacells from their constituent single cells. Options are "average" or "sum".
#' @param max_shared the maximum number of cells to be shared across two metacells
#' @param target_metacells the maximum target number of metacells to construct
#' @param max_iter the maximum number of iterations in the metacells bootstrapping loop
#' @param max_shared the maximum number of cells to be shared across two metacells
#' @param verbose logical indicating whether to print additional information
#' @param wgcna_name name of the WGCNA experiment
#' @keywords scRNA-seq
@@ -18,11 +21,15 @@
#' @examples
#' ConstructMetacells
ConstructMetacells <- function(
  seurat_obj, name='agg', ident.group='seurat_clusters', k=50,
  seurat_obj, name='agg', ident.group='seurat_clusters', k=25,
  reduction='umap', assay='RNA',
  cells.use = NULL, # if we don't want to use all the cells to make metacells, good for train/test split
  slot='counts',  meta=NULL, return_metacell=FALSE,
  mode = 'average', max_shared=10, verbose=FALSE, wgcna_name=NULL
  mode = 'average', max_shared=15,
  target_metacells=1000,
  max_iter=5000,
  verbose=FALSE,
  wgcna_name=NULL
){

  if(is.null(wgcna_name)){wgcna_name <- seurat_obj@misc$active_wgcna}
@@ -63,7 +70,7 @@ ConstructMetacells <- function(
  get_shared <- function(other, this_choice) {
      k2 - length(union(cell_sample[other, ], this_choice))
  }
  while (length(good_choices) > 0 & it < 5000) {
  while (length(good_choices) > 0 & length(chosen) < target_metacells & it < max_iter) {
      it <- it + 1
      choice <- sample(seq_len(length(good_choices)), size = 1,
          replace = FALSE)
@@ -74,7 +81,8 @@ ConstructMetacells <- function(
      this_choice <- cell_sample[nrow(cell_sample), ]
      shared <- sapply(others, get_shared, this_choice = this_choice)

      if (max(shared) < 0.9 * k) {
      # if (max(shared) < 0.9 * k) { # old way of doing it
      if(max(shared) <= max_shared){
          chosen <- new_chosen
      }
  }
@@ -101,8 +109,8 @@ ConstructMetacells <- function(
  # groups of cells to combine
  mask <- sapply(seq_len(nrow(cell_sample)), function(x) seq_len(ncol(exprs_old)) %in%
      cell_sample[x, , drop = FALSE])
  mask <- mask[,which(shared_old <= max_shared)]
  cell_sample <- cell_sample[which(shared_old <= max_shared),]
  # mask <- mask[,which(shared_old <= max_shared)]
  # cell_sample <- cell_sample[which(shared_old <= max_shared),]
  mask <- Matrix::Matrix(mask)

  # average or sum expression?
@@ -120,7 +128,7 @@ ConstructMetacells <- function(
  )

  # calculate stats:
  shared <- shared[shared <= max_shared]
  # shared <- shared[shared <= max_shared]
  max_shared <- max(shared)
  median_shared <- median(shared)
  mean_shared <- mean(shared)
@@ -188,11 +196,14 @@ ConstructMetacells <- function(
#' @param group.by A character vector of Seurat metadata column names representing groups for which metacells will be computed.
#' @param k Number of nearest neighbors to aggregate. Default = 50
#' @param name A string appended to resulting metalcells. Default = 'agg'
#' @param reduction A dimensionality reduction stored in the Seurat object. Default = 'umap'
#' @param reduction A dimensionality reduction stored in the Seurat object. Default = 'pca'
#' @param assay Assay to extract data for aggregation. Default = 'RNA'
#' @param slot Slot to extract data for aggregation. Default = 'data'
#' @param mode determines how to make gene expression profiles for metacells from their constituent single cells. Options are "average" or "sum".
#' @param min_cells the minimum number of cells in a particular grouping to construct metacells
#' @param max_shared the maximum number of cells to be shared across two metacells
#' @param target_metacells the maximum target number of metacells to construct
#' @param max_iter the maximum number of iterations in the metacells bootstrapping loop
#' @param verbose logical indicating whether to print additional information
#' @param wgcna_name name of the WGCNA experiment
#' @keywords scRNA-seq
@@ -200,10 +211,14 @@ ConstructMetacells <- function(
#' @examples
#' MetacellsByGroups
MetacellsByGroups <- function(
  seurat_obj, group.by=c('seurat_clusters'), ident.group='seurat_clusters',
  k=50, reduction='umap', assay='RNA',
  seurat_obj, group.by=c('seurat_clusters'),
  ident.group='seurat_clusters',
  k=25, reduction='pca', assay='RNA',
  cells.use = NULL, # if we don't want to use all the cells to make metacells, good for train/test split
  slot='counts', mode = 'average', max_shared=10, verbose=FALSE, wgcna_name=NULL
  slot='counts', mode = 'average', min_cells=100,
  max_shared=15,
  target_metacells=1000,
  max_iter=5000, verbose=FALSE, wgcna_name=NULL
){

  if(is.null(wgcna_name)){wgcna_name <- seurat_obj@misc$active_wgcna}
@@ -221,6 +236,11 @@ MetacellsByGroups <- function(
    stop('Invalid choice for mode. Mode can be either sum or average.')
  }

  # check reduction
  if(!(reduction %in% names(seurat_obj@reductions))){
    stop(paste0("Invalid reduction (", reduction, "). Reductions in Seurat object: ", paste(names(seurat_obj@reductions), collapse=', ')))
  }

  # subset seurat object by seleted cells:
  if(!is.null(cells.use)){
    seurat_full <- seurat_obj
@@ -242,9 +262,13 @@ MetacellsByGroups <- function(

  # remove groups that are too small:
  # TODO: add a warning to let the user know that some groups are skipped?
  groupings <- groupings[table(seurat_obj$metacell_grouping) >= 2*k]
  groupings <- groupings[table(seurat_obj$metacell_grouping) >= min_cells]
  print(groupings)

  if(length(groupings) == 0 ){
    stop("No groups met the min_cells requirement.")
  }

  # unique meta-data for each group
  meta_df <- as.data.frame(do.call(rbind, strsplit(groupings, '#')))
  colnames(meta_df) <- group.by
@@ -268,7 +292,7 @@ MetacellsByGroups <- function(
    seurat_obj = seurat_list,
    name = groupings,
    meta = meta_list,
    MoreArgs = list(k=k, reduction=reduction, assay=assay, slot=slot, return_metacell=TRUE, mode=mode, max_shared=max_shared, verbose=verbose, wgcna_name=wgcna_name)
    MoreArgs = list(k=k, reduction=reduction, assay=assay, slot=slot, return_metacell=TRUE, mode=mode, max_shared=max_shared, max_iter=max_iter, target_metacells=target_metacells, verbose=verbose, wgcna_name=wgcna_name)
  )
  names(metacell_list) <- groupings

+1 −1
Original line number Diff line number Diff line
@@ -39,7 +39,7 @@
      </button>
      <span class="navbar-brand">
        <a class="navbar-link" href="https://smorabit.github.io/scWGCNA/index.html">hdWGCNA</a>
        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.1.1.9004</span>
        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.1.1.9005</span>
      </span>
    </div>

Loading