Commit e09b2bdb authored by smorabit's avatar smorabit
Browse files

Update to ModuleConnectivity

parent a0c23253
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
Package: hdWGCNA
Title: hdWGCNA
Version: 0.2.04
Version: 0.2.1
Authors@R: c(
    person("Sam", "Morabito", , "smorabit@uci.edu", role = c("aut", "cre"),
           comment = c(ORCID = "0000-0002-7768-4856")),
@@ -13,3 +13,4 @@ LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.1.2
URL: https://smorabit.github.io/hdWGCNA/
Imports: WGCNA, Seurat
+2 −14
Original line number Diff line number Diff line
# Generated by roxygen2: do not edit by hand

export(AvgModuleExpr)
export(ComputeModuleEigengene)
export(ComputeROC)
export(ConstructMetacells)
export(ConstructMetaspots)
export(ConstructNetwork)
export(DimPlotMetacells)
export(DoHubGeneHeatmap)
export(EnrichrBarPlot)
export(EnrichrDotPlot)
@@ -42,11 +38,8 @@ export(GetWGCNAParams)
export(HubGeneNetworkPlot)
export(MetacellsByGroups)
export(MetaspotsByGroups)
export(ModuleConnectivity)
export(ModuleCorrNetwork)
export(ModuleCorrelogram)
export(ModuleEigengenes)
export(ModuleExprScore)
export(ModuleFeaturePlot)
export(ModuleNetworkPlot)
export(ModulePreservation)
@@ -56,7 +49,6 @@ export(ModuleUMAPPlot)
export(MotifOverlapBarPlot)
export(MotifScan)
export(MotifTargetScore)
export(NormalizeMetacells)
export(OverlapBarPlot)
export(OverlapDotPlot)
export(OverlapModulesDEGs)
@@ -72,11 +64,7 @@ export(ROCCurves)
export(ResetModuleColors)
export(ResetModuleNames)
export(RunEnrichr)
export(RunHarmonyMetacells)
export(RunModuleUMAP)
export(RunPCAMetacells)
export(RunUMAPMetacells)
export(ScaleMetacells)
export(SelectNetworkGenes)
export(SetActiveWGCNA)
export(SetAvgModuleExpr)
@@ -107,8 +95,8 @@ export(SetupForWGCNA)
export(TestSoftPowers)
export(TestSoftPowersConsensus)
export(TransferModuleGenome)
export(construct_metacells)
export(metacells_by_groups)
export(scale01)
export(shuffle_points)
export(umap_theme)
import(Seurat)
import(WGCNA)
+8 −0
Original line number Diff line number Diff line
# hdWGCNA 0.2.1 (2023-01-24)
## Added
- `ReassignModules` function.

## Changes
- New option in `ModuleConnectivity` to use `corSparse` to compute the correlation, which greatly reduces runtime and memory usage.
- New option in `ModuleConnectivity` to automatically reassign features to different modules if kME is negative.

# hdWGCNA 0.2.03 (2022-12-15)
## Added
- None

R/ConstructNetwork.R

0 → 100644
+178 −0
Original line number Diff line number Diff line
#' ConstructNetwork
#'
#' @description Constructs a co-expression network and groups genes into modules
#' given a Seurat object that has been prepared for network analysis.
#'
#' @return seurat_obj with the co-expression network and gene modules computed for the selected wgcna experiment
#'
#' @param seurat_obj A Seurat object
#' @param soft_power the soft power used for network construction. Automatically selected by default.
#' @param min_power the smallest soft power to be selected if soft_power=NULL
#' @param tom_outdir path to the directory where the TOM will be written
#' @param tom_name prefix name given to the TOM output file
#' @param consensus flag indicating whether or not to perform Consensus network analysis
#' @param wgcna_name name of the WGCNA experiment
#' @param ... additional parameters passed to blockwiseConsensusModules
#'
#' @details
#' ConstructNetwork builds a co-expression network and identifies clusters of
#' highly co-expressed genes (modules) from the metacell or metaspot
#' expression matrix stored in the Seurat object. Before running this function,
#' the following functions must be run on the input Seurat object:
#'
#' 1. SetupForWGCNA
#' 2. MetacellsByGroups or MetaspotsByGroups, and NormalizeMetacells
#' 3. SetDatExpr or SetMultiExpr
#' 4. TestSoftPowers or TestSoftPowersConsensus
#'
#' This function can also be used to perform consensus network analysis if consensus=TRUE
#' is selected. ConstructNetwork calls the WGCNA function blockwiseConsensusModules
#' to compute the adjacency matrix, topological overlap matrix, and to run the
#' Dynamic Tree Cut algorithm to identify gene modules. blockwiseConsensusModules
#' has numerous parameters but here we have selected default parameters that we
#' have found to provide reasonable results on a variety of single-cell and
#' spatial transcriptomic datasets.
#'
#' @import WGCNA
#' @import Seurat
ConstructNetwork <- function(
  seurat_obj, soft_power=NULL, min_power=3,
  tom_outdir="TOM",
  tom_name = NULL,
  consensus = FALSE,
  overwrite_tom = FALSE,
  wgcna_name = NULL,
  blocks=NULL, maxBlockSize=30000, randomSeed=12345, corType="pearson",
  consensusQuantile=0.3, networkType = "signed", TOMType = "signed",
  TOMDenom = "min", scaleTOMs = TRUE, scaleQuantile = 0.8,
  sampleForScaling = TRUE, sampleForScalingFactor = 1000,
  useDiskCache = TRUE, chunkSize = NULL,
  deepSplit = 4, pamStage=FALSE, detectCutHeight = 0.995, minModuleSize = 50,
  mergeCutHeight = 0.2, saveConsensusTOMs = TRUE, ...
){

  if(is.null(wgcna_name)){wgcna_name <- seurat_obj@misc$active_wgcna}

  # suffix for the tom
  if(is.null(tom_name)){
    tom_name <- gsub(' ', '_', wgcna_name)
  }

  # tom file name
  renamed <- paste0(tom_outdir, '/', tom_name, '_TOM.rda')
  if(file.exists(renamed)){
    if(overwrite_tom){
      warning(paste0('Overwriting TOM ', renamed))
    } else{
      stop(paste0("TOM ", renamed, " already exists. Set overwrite_tom = TRUE or change tom_name to proceed."))
    }
  }

  # constructing network on multiple datasets (consensus WGCNA)
  if(consensus){

    multiExpr <- GetMultiExpr(seurat_obj)
    checkSets(multiExpr) # check data size

  # constructing network on a single dataset
  } else{

    # get datExpr from seurat object
    datExpr <- GetDatExpr(seurat_obj)

    if(is.null(wgcna_name)){
      wgcna_name <- 'all'
    }

    nSets = 1
    setLabels = gsub(' ', '_', wgcna_name)
    shortLabels = setLabels
    multiExpr <- list()
    multiExpr[[wgcna_name]] <- list(data=datExpr)
    checkSets(multiExpr) # check data size
  }

  # make output dir for the TOM
  if(!dir.exists(tom_outdir)){
    dir.create(tom_outdir)
  }

  if(is.null(soft_power) & !consensus){
    soft_power <- GetPowerTable(seurat_obj) %>% subset(SFT.R.sq >= 0.8 & Power > min_power) %>% .$Power %>% min
    cat(paste0("Soft power not provided. Automatically using the lowest power that meets 0.8 scale-free topology fit. Using soft_power = ", soft_power, "\n"))
  } else if(is.null(soft_power)){
    power_tables <- GetPowerTable(seurat_obj) %>% dplyr::group_split(group)
    soft_power <- sapply(power_tables, function(power_table){
      power_table %>% subset(SFT.R.sq >= 0.8 & Power > min_power) %>% .$Power %>% min
    })
    cat(paste0("Soft power not provided. Automatically using the lowest power that meets 0.8 scale-free topology fit. Using soft_power = c(", paste0(soft_power, collapse=','), ")\n"))
  }

  # construct the network
  net <- WGCNA::blockwiseConsensusModules(
    multiExpr,
    power = soft_power,
    blocks = blocks,
    maxBlockSize = maxBlockSize, ## This should be set to a smaller size if the user has limited RAM
    randomSeed = randomSeed,
    corType = corType,
    consensusQuantile = consensusQuantile,
    networkType = networkType,
    TOMType = TOMType,
    TOMDenom = TOMDenom,
    scaleTOMs = scaleTOMs, scaleQuantile = scaleQuantile,
    sampleForScaling = sampleForScaling, sampleForScalingFactor = sampleForScalingFactor,
    useDiskCache = useDiskCache, chunkSize = chunkSize,
    deepSplit = deepSplit,
    pamStage=pamStage,
    detectCutHeight = detectCutHeight, minModuleSize = minModuleSize,
    mergeCutHeight = mergeCutHeight,
    saveConsensusTOMs = saveConsensusTOMs,
    consensusTOMFilePattern = "ConsensusTOM-block.%b.rda", ...)

  # rename consensusTOM file:
  file.rename('ConsensusTOM-block.1.rda', renamed)

  # add network parameters to the Seurat object:
  params <- list(
    power = soft_power,
    blocks = blocks,
    maxBlockSize = maxBlockSize, ## This should be set to a smaller size if the user has limited RAM
    randomSeed = randomSeed,
    corType = corType,
    consensusQuantile = consensusQuantile,
    networkType = networkType,
    TOMType = TOMType,
    TOMDenom = TOMDenom,
    scaleTOMs = scaleTOMs, scaleQuantile = scaleQuantile,
    sampleForScaling = sampleForScaling, sampleForScalingFactor = sampleForScalingFactor,
    useDiskCache = useDiskCache, chunkSize = chunkSize,
    deepSplit = deepSplit,
    pamStage=pamStage,
    detectCutHeight = detectCutHeight, minModuleSize = minModuleSize,
    mergeCutHeight = mergeCutHeight,
    saveConsensusTOMs = saveConsensusTOMs
  )

  # add parameters:
  seurat_obj <- SetWGCNAParams(seurat_obj, params)

  # append working directory to the TOM file so it has the full path:
  net$TOMFiles <- paste0(getwd(), '/', renamed)

  # add network to seurat obj
  seurat_obj <- SetNetworkData(seurat_obj, net, wgcna_name)

  # set the modules df in the Seurat object
  mods <- GetNetworkData(seurat_obj)$colors
  seurat_obj <- SetModules(
    seurat_obj, mod_df = data.frame(
      "gene_name" = names(mods),
      "module" = factor(mods, levels=unique(mods)),
      "color" = mods
    )
  )

  seurat_obj

}

R/DMEs.R

0 → 100644
+183 −0
Original line number Diff line number Diff line

#' FindAllDMEs
#'
#' Modification of the Seurat function FindMarkers used to perform iterative one-versus-all differential module eigengene testing given a group of cells (ie clusters, cell types, etc).
#'
#' @param seurat_obj A Seurat object
#' @param group.by column in seurat_obj@meta.data containing cell grouping information
#' @param harmonized logical determining whether or not to use harmonized MEs
#' @param wgcna_name The name of the hdWGCNA experiment in the seurat_obj@misc slot
#' @param add_missing logical determining whether or not to add missing modules back into the resulting dataframe with NA values.
#' @param ... Additional parameters for the FindMarkers function
#' @keywords scRNA-seq
#' @export
#' @return A dataframe contaning differential ME results
#' @examples
#' FindAllDMEs
FindAllDMEs <- function(
  seurat_obj,
  group.by,
  harmonized=TRUE,
  wgcna_name=NULL,
  add_missing=FALSE,
  test.use='wilcox',
  only.pos=FALSE,
  logfc.threshold = 0,
  min.pct=0,
  verbose=FALSE,
  pseudocount.use=0,
  ...
){

  if(is.null(wgcna_name)){wgcna_name <- seurat_obj@misc$active_wgcna}

  # get list of groups
  groups <- as.character(unique(seurat_obj@meta.data[[group.by]]))

  # get module eigengenes, remove grey module
  MEs <- GetMEs(seurat_obj, harmonized, wgcna_name)
  MEs <- MEs[, colnames(MEs) != 'grey']

  # set all negative values to zero
  MEs[MEs < 0] <- 0

  # transpose MEs so cells are columns and rows are MEs
  MEs <- t(MEs)

  # create new assay for MEs:
  ME_assay <- Seurat::CreateAssayObject(MEs)

  DMEs_list <- list()
  for(cur_group in groups){

    print(cur_group)

    barcodes1 <- colnames(seurat_obj)[seurat_obj@meta.data[[group.by]] == cur_group]
    barcodes2 <- colnames(seurat_obj)[seurat_obj@meta.data[[group.by]] != cur_group]

    # run differential test on the ME assay
    DMEs <- FindMarkers(
      ME_assay,
      cells.1 = barcodes1,
      cells.2 = barcodes2,
      slot='counts',
      test.use=test.use,
      only.pos=only.pos,
      logfc.threshold=logfc.threshold,
      min.pct=min.pct,
      verbose=verbose,
      pseudocount.use=pseudocount.use,
      ...
    )
    DMEs$module <- rownames(DMEs)
    DMEs$group <- cur_group

    # add missing modules if specified
    if(add_missing){
      missing_mods <- rownames(MEs)[!(rownames(MEs) %in% DMEs$module)]
      for(cur_mod in missing_mods){
        DMEs[cur_mod,] <- NA
        DMEs[cur_mod,'module'] <- cur_mod
      }
    }

    # add to ongoing list:
    DMEs_list[[cur_group]] <- DMEs

  }

  DMEs <- do.call(rbind, DMEs_list)
  DMEs
}


#' FindDMEs
#'
#' Modification of the Seurat function FindMarkers used to perform differential module eigengene testing between two sets of cell barcodes.
#'
#' @param seurat_obj A Seurat object
#' @param barcodes1 character vector containing cell barcodes for the first group to test. Positive fold-change means up-regulated in this group.
#' @param barcodes2 character vector containing cell barcodes for the second group to test. Negative fold-change means up-regulated in this group.
#' @param harmonized logical determining whether or not to use harmonized MEs
#' @param wgcna_name The name of the hdWGCNA experiment in the seurat_obj@misc slot
#' @param add_missing logical determining whether or not to add missing modules back into the resulting dataframe with NA values.
#' @param ... Additional parameters for the FindMarkers function
#' @keywords scRNA-seq
#' @export
#' @return A dataframe contaning differential ME results
#' @examples
#' FindDMEs
FindDMEs <- function(
  seurat_obj,
  barcodes1,
  barcodes2,
  harmonized=TRUE,
  wgcna_name=NULL,
  add_missing=FALSE,
  test.use='wilcox',
  only.pos=FALSE,
  logfc.threshold = 0,
  min.pct=0,
  verbose=FALSE,
  pseudocount.use=0,
  ...
){

  if(is.null(wgcna_name)){wgcna_name <- seurat_obj@misc$active_wgcna}

  # ensure that selected barcodes are in the seurat obj
  if(!(all(barcodes1 %in% colnames(seurat_obj)))){
    stop('Some barcodes in barcodes1 not found in colnames(seurat_obj).')
  }
  if(!(all(barcodes2 %in% colnames(seurat_obj)))){
    stop('Some barcodes in barcodes2 not found in colnames(seurat_obj).')
  }

  # check for overlap in the two groups of bacrodes:
  if(length(intersect(barcodes1, barcodes2)) > 0){
    stop('Some barcodes overlap in barcodes1 and barcodes2')
  }

  # get module eigengenes, remove grey module
  MEs <- GetMEs(seurat_obj, harmonized, wgcna_name)
  MEs <- MEs[, colnames(MEs) != 'grey']
  print(dim(MEs))
  print(colnames(MEs))

  # set all negative values to zero
  MEs[MEs < 0] <- 0

  # transpose MEs so cells are columns and rows are MEs
  MEs <- t(MEs)

  # create new assay for MEs:
  ME_assay <- Seurat::CreateAssayObject(MEs)

  # run differential test on the ME assay
  DMEs <- FindMarkers(
    ME_assay,
    cells.1 = barcodes1,
    cells.2 = barcodes2,
    slot='counts',
    test.use=test.use,
    only.pos=only.pos,
    logfc.threshold=logfc.threshold,
    min.pct=min.pct,
    verbose=verbose,
    pseudocount.use=pseudocount.use,
    ...
  )
  DMEs$module <- rownames(DMEs)

  # add missing modules if specified
  if(add_missing){
    missing_mods <- rownames(MEs)[!(rownames(MEs) %in% DMEs$module)]
    for(cur_mod in missing_mods){
      DMEs[cur_mod,] <- NA
      DMEs[cur_mod,'module'] <- cur_mod
    }
  }

  DMEs

}
Loading