Commit be3df337 authored by HaojiaWu's avatar HaojiaWu
Browse files

more updates

parent af86621d
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@ Imports:
    circlize,
    dplyr,
    ggplot2,
    ggh4x,
    MASS,
    scales,
    progress,
@@ -46,6 +47,7 @@ Depends:
    circlize,
    dplyr,
    ggplot2,
    ggh4x,
    MASS,
    scales,
    progress,
+41 −45
Original line number Diff line number Diff line
@@ -6,13 +6,15 @@
#' This function can be used for plotting a single gene expression across 
#' different groups in a study with complex group design.
#'
#' @param seu_obj A complete Seurat object
#' @param seu_obj A complete Seurat object.
#' @param feature Gene name. Only one gene is allowed.
#' @param celltypes Cell types to be included in the dot plot. Default: all cell types.
#' @param groupby The group to show on x axis. One of the column names in meta.data.
#' @param groups The group to show on x axis. One of the column names in meta.data.
#' @param splitby The group to separate the gene expression. One of the column names in meta.data.
#' @param scale.by Methods to scale the dot size. "radius" or "size"
#' @param strip.color Colors for the strip background
#' @param scale.by Methods to scale the dot size. "radius" or "size".
#' @param color.palette Color for gene expression.
#' @param strip.color Colors for the strip background.
#' @param font.size Font size for the labels.
#' @param do.scale Whether or not to scale the dot when percentage expression of the gene is less than 20.
#' @return A ggplot object
#' @export
@@ -20,81 +22,75 @@ complex_dotplot_single <- function(
  seu_obj, 
  feature, 
  celltypes=NULL,
  groupby,
  groups,
  splitby=NULL,
  color.palette = NULL,
  font.size = 12,
  strip.color=NULL,
  do.scale=T,
  scale.by='radius'
){
  groupby_level<-levels(seu_obj@meta.data[,groupby])
  if (is.null(groupby_level)){
    seu_obj@meta.data[,groupby] <-factor(seu_obj@meta.data[,groupby], levels = names(table(seu_obj@meta.data[,groupby])))
    groupby_level<-levels(seu_obj@meta.data[,groupby])
  } 
  if (sum(grepl("_", groupby_level))>0){
    seu_obj@meta.data[,groupby]<-gsub("_","-",seu_obj@meta.data[,groupby])
    groupby_level<-gsub("_","-",groupby_level)
    seu_obj@meta.data[,groupby] <-factor(seu_obj@meta.data[,groupby], levels = groupby_level)
  groups_level<-levels(seu_obj@meta.data[,groups])
  if (is.null(groups_level)){
    seu_obj@meta.data[,groups] <-factor(seu_obj@meta.data[,groups], levels = names(table(seu_obj@meta.data[,groups])))
    groups_level<-levels(seu_obj@meta.data[,groups])
  } 
  if(is.null(celltypes)){
    celltypes<-levels(seu_obj)
  }
  seu_obj<-subset(seu_obj,idents=celltypes)
  celltypes<-gsub("_", "-", celltypes)
  seu_obj@meta.data$celltype<-as.character(seu_obj@active.ident)
  seu_obj@meta.data$celltype<-gsub("_", "-", seu_obj@meta.data$celltype)
  seu_obj<-SetIdent(seu_obj, value='celltype')
  levels(seu_obj)<-celltypes
  if(is.null(color.palette)){
    color.palette <- colorRampPalette(c('grey80','lemonchiffon1','indianred1','darkred'))(255)
  }
  if(!is.null(splitby)){
    if (is.null(levels(seu_obj@meta.data[,splitby]))){
      seu_obj@meta.data[,splitby] <-factor(seu_obj@meta.data[,splitby], levels = names(table(seu_obj@meta.data[,splitby])))
    }
    splitby_level<-levels(seu_obj@meta.data[,splitby])
    count_df<-extract_gene_count(seu_obj, features = feature, meta.groups = c(groupby,splitby))
    count_df$new_group<-paste(count_df[,groupby], count_df[,"celltype"], count_df[,splitby],sep = "___")
    count_df<-extract_gene_count(seu_obj, features = feature, cell.types = celltypes, meta.groups = c(groups,splitby))
    count_df$new_group<-paste(count_df[,groups], count_df[,"celltype"], count_df[,splitby],sep = "___")
    exp_df<-aggregate(.~new_group, data=count_df[,c('new_group',feature)], FUN=function(x){mean(expm1(x))}) 
    pct_df<-aggregate(.~new_group, data=count_df[,c('new_group',feature)], FUN=function(x){length(x[x > 0]) / length(x)})
    pct_df<-aggregate(.~new_group, data=count_df[,c('new_group',feature)], FUN=function(x){length(x[x > 0]) / length(x)}) #This is the same data processing as Seurat
    colnames(exp_df)[2]<-"avg.exp"
    colnames(pct_df)[2]<-"pct.exp"
    data_plot<-merge(exp_df, pct_df, by='new_group')
    data_plot$groupby <- as.character(lapply(X=strsplit(data_plot$new_group, split = "___"),FUN = function(x){x[[1]]}))
    data_plot$groups <- as.character(lapply(X=strsplit(data_plot$new_group, split = "___"),FUN = function(x){x[[1]]}))
    data_plot$celltype <- as.character(lapply(X=strsplit(data_plot$new_group, split = "___"),FUN = function(x){x[[2]]}))
    data_plot$splitby <- as.character(lapply(X=strsplit(data_plot$new_group, split = "___"),FUN = function(x){x[[3]]}))
    data_plot$groupby <- factor(data_plot$groupby, levels = groupby_level)
    data_plot$groups <- factor(data_plot$groups, levels = groups_level)
    data_plot$splitby <- factor(data_plot$splitby, levels = splitby_level)
    data_plot$celltype <- factor(data_plot$celltype, levels = rev(celltypes))
  } else {
  count_df<-extract_gene_count(seu_obj, features = feature, meta.groups = groupby)
  count_df$new_group<-paste(count_df[,groupby], count_df[,"celltype"],sep = "_")
  count_df<-extract_gene_count(seu_obj, features = feature, cell.types = celltypes, meta.groups = groups)
  count_df$new_group<-paste(count_df[,groups], count_df[,"celltype"],sep = "___")
  exp_df<-aggregate(.~new_group, data=count_df[,c('new_group',feature)], FUN=function(x){mean(expm1(x))})
  pct_df<-aggregate(.~new_group, data=count_df[,c('new_group',feature)], FUN=function(x){length(x[x > 0]) / length(x)})
  colnames(exp_df)[2]<-"avg.exp"
  colnames(pct_df)[2]<-"pct.exp"
  data_plot<-merge(exp_df, pct_df, by='new_group')
  data_plot$groupby <- as.character(lapply(X=strsplit(data_plot$new_group, split = "_"),FUN = function(x){x[[1]]}))
  data_plot$celltype <- as.character(lapply(X=strsplit(data_plot$new_group, split = "_"),FUN = function(x){x[[2]]}))
  data_plot$groupby <- factor(data_plot$groupby, levels = groupby_level)
  data_plot$celltype <- factor(data_plot$celltype, levels = celltypes)
  data_plot$groups <- as.character(lapply(X=strsplit(data_plot$new_group, split = "___"),FUN = function(x){x[[1]]}))
  data_plot$celltype <- as.character(lapply(X=strsplit(data_plot$new_group, split = "___"),FUN = function(x){x[[2]]}))
  data_plot$groups <- factor(data_plot$groups, levels = groups_level)
  data_plot$celltype <- factor(data_plot$celltype, levels = rev(celltypes))
  }
  scale.func <- switch(
    EXPR = scale.by,
    'size' = scale_size,
    'radius' = scale_radius,
    stop("'scale.by' must be either 'size' or 'radius'")
  )
  ) ### This function is from Seurat https://github.com/satijalab/seurat
  data_plot$pct.exp <- round(100 * data_plot$pct.exp, 2)
  data_plot$avg.exp <- scale(data_plot$avg.exp)
  p<-ggplot(data_plot, aes(y = celltype, x = groupby)) +  
  p<-ggplot(data_plot, aes(y = celltype, x = groups)) +  
    geom_tile(fill="white", color="white") +
    geom_point(aes( colour=avg.exp, size =pct.exp))  +  
    scale_color_gradientn(colours  =  colorRampPalette(c('grey80','lemonchiffon1','indianred1','darkred'))(255))+ 
    scale_color_gradientn(colours  =  color.palette )+ 
    theme(panel.background = element_rect(fill = "white", colour = "black"),
          axis.text.x = element_text(angle = 45, hjust = 1),
          plot.title = element_text(size = 16,hjust = 0.5, face = 'bold'),
          axis.text = element_text(size = 12),
          axis.title=element_text(size=8),
          legend.text=element_text(size=8),
          legend.title = element_text(size = 12),
          axis.text.x = element_text(angle = 45, hjust = 1, size = font.size),
          plot.title = element_text(size = (font.size +2), hjust = 0.5, face = 'bold'),
          axis.text = element_text(size = font.size),
          legend.text=element_text(size=(font.size-2)),
          legend.title = element_text(size = (font.size)),
          strip.text = element_text( size = font.size),
          legend.position="right")+
    ylab("")+xlab("")+ggtitle(feature)
  if(do.scale){
@@ -126,7 +122,7 @@ complex_dotplot_single <- function(
#' @param seu_obj A complete Seurat object
#' @param features A vector of gene names.
#' @param celltypes Cell types to be included in the dot plot. Default: all cell types.
#' @param groupby Group ID Must be one of the column names in the meta.data slot of the Seurat object.
#' @param groups Group ID Must be one of the column names in the meta.data slot of the Seurat object.
#' @param strip.color Colors for the strip background
#' @return A ggplot object
#' @export
@@ -134,7 +130,7 @@ complex_dotplot_multiple <- function(
  seu_obj, 
  features, 
  celltypes=NULL,
  groupby, 
  groups, 
  strip.color = NULL
  ){
 pb <- progress_bar$new(
@@ -143,7 +139,7 @@ complex_dotplot_multiple <- function(
 plot_list<-list()
 for(i in 1:length(features)){
  pp<-invisible(
    complex_dotplot_single(seu_obj = seu_obj, feature = features[i], groupby = groupby, celltypes = celltypes)
    complex_dotplot_single(seu_obj = seu_obj, feature = features[i], groups = groups, celltypes = celltypes)
  )
  pp<-pp$data
  pp$gene <- features[i]
@@ -155,7 +151,7 @@ complex_dotplot_multiple <- function(
  all_data$gene<-factor(all_data$gene, levels = rev(features)) 
  all_data$celltype <- factor(all_data$celltype, levels = levels(seu_obj))
  p <- invisible(
    ggplot(all_data, aes(x = groupby, y = gene)) +  
    ggplot(all_data, aes(x = groups, y = gene)) +  
    geom_tile(fill="white", color="white") +
    geom_point(aes( colour=avg.exp, size =pct.exp), alpha=0.9)  +  
    scale_color_gradientn(colours  =  grDevices::colorRampPalette(c('grey80','lemonchiffon1','indianred1','darkred'))(255))+ 
+57 −12
Original line number Diff line number Diff line
@@ -49,7 +49,6 @@ complex_vlnplot_single <- function(
    gene_count[,allgroups[i]]<-factor(gene_count[,allgroups[i]],
                                      levels = group_level)
  }
  max_exp<-max(gene_count[,feature])
  set.seed(seed = 42)
  noise <- rnorm(n = length(x = gene_count[,feature])) / 100000 ## This follows the same data processing for VlnPlot in Seurat
  gene_count[, feature]<-gene_count[,feature]+noise
@@ -60,14 +59,14 @@ complex_vlnplot_single <- function(
      xlab("") +
      ylab("") +
      ggtitle(feature)+
      theme_bw() +
      theme(panel.grid.major = element_blank(),
            panel.grid.minor = element_blank(),
      theme(panel.background = element_rect(fill = "white",colour = "black"),
            axis.title = element_text(size = font.size), 
            axis.text.x = element_text(size = font.size, angle = 45, hjust = 1, vjust = 1),
            axis.text.y = element_text(size=(font.size-2)),
            strip.text = element_text( size = font.size),
            axis.text.x = element_text(size=(font.size-2), angle = 45, hjust = 1, vjust = 1),
            axis.title.y = element_text(size = font.size),
            plot.title = element_text(size=font.size, face = "bold", hjust = 0.5),
            legend.position = "none") 
            legend.title = element_blank(),
            legend.position = 'none',
            plot.title = element_text(size=(font.size),face = "bold", hjust = 0.5))
    if(add.dot){
      p = p + geom_quasirandom(size=pt.size, alpha=0.2)
    }
@@ -82,9 +81,7 @@ complex_vlnplot_single <- function(
    }
    
  } else {
    if(!is.null(splitby)){
      stop("This function does not support spliting multiple groups. Plots will look too messy! Please select one group only in the 'groups' parameter if you want to use 'splitby'.")
    }
    if(is.null(splitby)){
    all_levels<-list()
    for(i in 1:length(groups)){
      if (is.null(levels(seu_obj@meta.data[,groups[i]]))){
@@ -114,6 +111,55 @@ complex_vlnplot_single <- function(
    }
    g <- change_strip_background(p, type = 'both',  strip.color = strip.color)
    print(grid::grid.draw(g))
    } else {
      count_list<-list()
      for(i in 1:length(groups)){
        df1<-gene_count[, c(groups[i],splitby[i],feature, "celltype")]
        colnames(df1)[1:2]<-c("group","split")
        df1$new_group<-groups[i]
        count_list[[i]]<-df1
      }
      new_count<-do.call("rbind", count_list)
      new_count$celltype<-factor(new_count$celltype, levels = celltypes)
      group_level<-list()
      for(i in 1:length(groups)){
        if (is.null(levels(seu_obj@meta.data[,groups[i]]))){
          seu_obj@meta.data[,groups[i]] <-factor(seu_obj@meta.data[,groups[i]], levels = names(table(seu_obj@meta.data[,groups[i]])))
        }
        group_level[[i]]<-levels(seu_obj@meta.data[,groups[i]])
      }
      group_level<-as.character(unlist(group_level))
      new_count$group<-factor(new_count$group, levels=group_level)
      fill_x1<-grDevices::rainbow(length(groups), alpha = 0.5)
      fill_x2<-list()
      for(i in 1:length(splitby)){
        n_col<-unique(gene_count[, splitby[i]])
        fill_x2[[i]]<-scales::hue_pal(l=90)(length(n_col))
      }
      fill_x2<-as.character(unlist(fill_x2))
      fill_x <- c(fill_x1, fill_x2)
      fill_y <- scales::hue_pal(l=90)(length(celltypes))
      p<- ggplot(new_count, aes_string(x="group", y=feature, fill="group"))+
        geom_violin(scale = 'width', adjust = 1, trim = TRUE, size=0.3, alpha=alpha, color="pink")+
        xlab("") + ylab("") + ggtitle(feature) +
        theme(panel.background = element_rect(fill = "white",colour = "black"),
              axis.title = element_text(size = font.size), 
              axis.text.x = element_text(size = font.size, angle = 45, hjust = 1, vjust = 1),
              axis.text.y = element_text(size=(font.size-2)),
              strip.text = element_text( size = font.size),
              legend.title = element_blank(),
              legend.position = 'none',
              plot.title = element_text(size=(font.size),face = "bold", hjust = 0.5)) +
        facet_nested(celltype ~ new_group + split, scales = "free_x", 
                     strip = strip_nested( background_x = elem_list_rect(fill = fill_x),
                                           background_y = elem_list_rect(fill = fill_y)))
      if(add.dot){
        p = p + geom_quasirandom(size=pt.size, alpha=0.2)
        print(p)
      } else {
        p
      }
    }
  }
}

@@ -162,7 +208,6 @@ complex_vlnplot_multiple <- function(
  group_level<-levels(seu_obj@meta.data[,group])
  gene_count[,group]<-factor(gene_count[,group],levels = group_level)
  for(i in 1:length(features)){
    max_exp<-max(gene_count[,features[i]])
    set.seed(seed = 42)
    noise <- rnorm(n = length(x = gene_count[,features[i]])) / 100000 ## This follows the same data processing for VlnPlot in Seurat
    gene_count[, features[i]]<-gene_count[,features[i]]+noise
+20 −3
Original line number Diff line number Diff line
@@ -90,17 +90,34 @@ dev.off()
![alt text](https://github.com/HaojiaWu/Plot1cell/blob/master/data/vlnplot_single_split.png) <br />
#### One gene/multiple group factors violin plot:
```
png(filename =  'vlnplot_multiple.png', width = 6, height = 6,units = 'in', res = 200)
png(filename =  'vlnplot_multiple.png', width = 6, height = 6,units = 'in', res = 100)
complex_vlnplot_single(iri.integrated, feature = "Havcr1", groups = c("Group","Replicates"),celltypes   = c("PTS1" ,   "PTS2"  ,  "PTS3"  ,  "NewPT1" , "NewPT2"), font.size = 10)
dev.off()
```
![alt text](https://github.com/HaojiaWu/Plot1cell/blob/master/data/vlnplot_multiple.png) <br />

Note that the Replicates group here is for demo purpose. This is not the mouse ID as reported in our original paper.
Each group factor can be further splitted by its own factor by setting the splitby argument. Note that in this case, the order of the group factors needs to match the order of splitby factors. For example:
```
iri.integrated@meta.data$ReplicateID<-plyr::mapvalues(iri.integrated@meta.data$Replicates, from = names(table((iri.integrated@meta.data$Replicates))), to = c(rep("Rep1",3),rep("Rep2",3), rep("Rep3",1)))
iri.integrated@meta.data$ReplicateID<-as.character(iri.integrated@meta.data$ReplicateID)

png(filename =  'vlnplot_multiple_split.png', width = 7, height = 5,units = 'in', res = 200)
complex_vlnplot_single(iri.integrated, feature = "Havcr1", groups = c("Group","Replicates"),
                        celltypes   = c("PTS1" ,   "PTS2"  ,  "PTS3"  ,  "NewPT1" , "NewPT2"), 
                        font.size = 10, splitby = c("Phase","ReplicateID"), pt.size=0.05)
dev.off()

### In this example, "Phase" is a splitby factor for "Group" and "ReplicateID" is a splitby factor for "Replicates".

```

![alt text](https://github.com/HaojiaWu/Plot1cell/blob/master/data/vlnplot_multiple_split.png) <br />

Note that the Replicates group here is just for showcase purpose. This is not a meaning group ID in our snRNA-seq dataset.

#### Multiple genes/one group factor violin plot:
```
png(filename =  'vlnplot_multiple_genes.png', width = 8, height = 6,units = 'in', res = 300)
png(filename =  'vlnplot_multiple_genes.png', width = 6, height = 6,units = 'in', res = 300)
complex_vlnplot_multiple(iri.integrated, features = c("Havcr1",  "Slc34a1", "Vcam1",   "Krt20"  , "Slc7a13", "Slc5a12"), celltypes = c("PTS1" ,   "PTS2"  ,  "PTS3"  ,  "NewPT1" , "NewPT2"), group = "Group", add.dot=T, pt.size=0.01, alpha=0.01, font.size = 10)
dev.off()
```
+166 KiB
Loading image diff...
Loading