Commit f2b38346 authored by HaojiaWu's avatar HaojiaWu
Browse files

fix violinplot

parent d1eaed16
Loading
Loading
Loading
Loading
+74 −142
Original line number Diff line number Diff line
@@ -7,18 +7,20 @@
#' different groups or cell types. It is designed for visualizing a complicated scenario: 
#' Gene expression on multiple cell types and multiple conditions.
#'
#' @param seu_obj A complete Seurat object
#' @param seu_obj A complete Seurat object.
#' @param feature Gene name. Only one gene is allowed.
#' @param celltypes Cell types of interest. By default, all cell types are included.
#' @param groups Groups selected for plotting. Support multiple groups.
#' @param add.dot Whether or not to add points on the violins.
#' @param font.size Font size for the labels.
#' @param pt.size Point size for the data points on the violin
#' @param pt.size Point size for the data points on the violin.
#' @param splitby Group to split the gene expression. Only works when length(groups)==1.
#' @param strip.color Colors for the strip background
#' @param alpha Point transparency. value from 0 to 1.
#' @param strip.color Colors for the strip background.
#' @return A ggplot object
#' @export


complex_vlnplot_single <- function(
  seu_obj,
  feature,
@@ -28,6 +30,7 @@ complex_vlnplot_single <- function(
  font.size=14,
  pt.size=0.1,
  splitby=NULL,
  alpha=0.5,
  strip.color=NULL
){
  if(length(feature)>1){
@@ -50,80 +53,38 @@ complex_vlnplot_single <- function(
  set.seed(seed = 42)
  noise <- rnorm(n = length(x = gene_count[,feature])) / 100000 ## This follows the same data processing for VlnPlot in Seurat
  gene_count[, feature]<-gene_count[,feature]+noise
  if (length(groups)==1) {
    if(length(celltypes)==1){
      p<-ggplot(gene_count, aes_string(x=groups, y=feature, fill=groups))+
        geom_violin(scale = 'width', adjust = 1, trim = TRUE, size=0.3, alpha=0.5, color="pink")+
        xlab("") + ylab("") + ggtitle(feature) +
        theme(panel.background = element_rect(fill = "white",colour = "black"),
              axis.title = element_text(size = font.size), 
              axis.text.x = element_text(size = font.size, angle = 45, hjust = 1, vjust = 1),
              axis.text.y = element_text(size=(font.size-2)),
              legend.title = element_blank(),
              legend.position = 'none',
              strip.text = element_text( size = font.size),
              plot.title = element_text(size=(font.size+2), hjust = 0.5))+
        coord_cartesian(ylim = c(0, max_exp), clip = 'off') 
      if(add.dot){
        p = p + geom_quasirandom(size=pt.size, alpha=0.2)
      }
      if(!is.null(splitby)){
        p = p + facet_wrap(as.formula(paste("~", splitby)), scales = 'free_x')
        g <- change_strip_background(p, type = 'top',  strip.color = strip.color)
        print(grid::grid.draw(g))
      } else {
        p
      }
    } else {
      if(is.null(splitby)){
        plot_list<-list()
        for(i in 1:length(celltypes)){
          cell_count <- gene_count[gene_count$celltype==celltypes[i],]
          p<-ggplot(cell_count, aes_string(x=groups, y=feature, fill=groups))+
            geom_violin(scale = 'width', adjust = 1, trim = TRUE, size=0.3, alpha=0.5, color="pink")+
            xlab("") + ylab(celltypes[i]) +
            theme(panel.background = element_rect(fill = "white",colour = "black"),
                  legend.position = "none", 
                  axis.text.x = element_blank(), 
                  axis.ticks.x = element_blank(), 
                  axis.title.y = element_text(size = font.size, angle = 0), 
                  axis.text.y = element_text(size = (font.size-2)),
                  plot.margin = unit(c(-0.5, 0, -0.5, 0), "cm") )+coord_cartesian(ylim = c(0, max_exp), clip = 'off') 
          if(add.dot){
            p = p + geom_quasirandom(size=pt.size, alpha=0.2)
          }
          plot_list[[i]]<-p
        }
        plot_list[[length(plot_list)]]<- plot_list[[length(plot_list)]] +
          theme(axis.text.x=element_text(angle = 45, hjust = 1, vjust = 1, size = font.size), axis.ticks.x = element_line())
        p<- patchwork::wrap_plots(plotlist = plot_list, ncol = 1)  + patchwork::plot_annotation(title = feature) & theme(plot.title = element_text(hjust = 0.5, size = (font.size +2)))
        p
      } else {
  gene_count$celltype<-factor(gene_count$celltype, levels = celltypes)
  if (length(groups)==1) {
    p<-ggplot(gene_count, aes_string(x = groups, y = feature, fill = groups)) +
          facet_grid(as.formula(paste("celltype","~", splitby)), scales = "free_x") +
          geom_violin(scale = 'width', adjust = 1, trim = TRUE, size=0.3, alpha=0.5, color="pink")+
      geom_violin(scale = 'width', adjust = 1, trim = TRUE, size=0.3, alpha=alpha, color="pink")+
      xlab("") +
          ylab(paste(feature,"expression")) +
      ylab("") +
      ggtitle(feature)+
      theme_bw() +
      theme(panel.grid.major = element_blank(),
            panel.grid.minor = element_blank(),
            strip.text = element_text( size = font.size),
            axis.text.x = element_text(size=(font.size-2), angle = 45, hjust = 1, vjust = 1),
            axis.title.y = element_text(size = font.size),
                legend.position = "none") +coord_cartesian(ylim = c(0, max_exp), clip = 'off') 
            plot.title = element_text(size=font.size, face = "bold", hjust = 0.5),
            legend.position = "none") 
    if(add.dot){
      p = p + geom_quasirandom(size=pt.size, alpha=0.2)
    }
    if(is.null(splitby)){
      p <- p + facet_wrap(~celltype, ncol = 1, strip.position = "right")
      g <- change_strip_background(p, type = 'right',  strip.color = strip.color)
      print(grid::grid.draw(g))
    } else {
      p<-p + facet_grid(as.formula(paste("celltype","~", splitby)), scales = "free_x") 
      g <- change_strip_background(p, type = 'both',  strip.color = strip.color)
      print(grid::grid.draw(g))
    }
    }
    
  } else {
    if(!is.null(splitby)){
      stop("This function does not support spliting multiple groups. Plots will look too messy! Please select one group only in the 'groups' parameter if you want to use 'splitby'.")
    }
    if(length(celltypes)==1){
    all_levels<-list()
    for(i in 1:length(groups)){
      if (is.null(levels(seu_obj@meta.data[,groups[i]]))){
@@ -133,10 +94,11 @@ complex_vlnplot_single <- function(
      all_levels[[i]]<-group_level
    }
    all_levels<-as.character(unlist(all_levels))
      gene_count<-reshape2::melt(gene_count[,c(feature, groups)], measure.vars  = groups)
    gene_count<-reshape2::melt(gene_count[,c(feature, groups, "celltype")], measure.vars  = groups)
    gene_count$value<-factor(gene_count$value, levels = all_levels)
    gene_count$celltype<-factor(gene_count$celltype, levels = celltypes)
    p<-ggplot(gene_count, aes_string(x="value", y=feature, fill="value"))+
        geom_violin(scale = 'width', adjust = 1, trim = TRUE, size=0.3, alpha=0.5, color="pink")+
      geom_violin(scale = 'width', adjust = 1, trim = TRUE, size=0.3, alpha=alpha, color="pink")+
      xlab("") + ylab("") + ggtitle(feature) +
      theme(panel.background = element_rect(fill = "white",colour = "black"),
            axis.title = element_text(size = font.size), 
@@ -145,44 +107,13 @@ complex_vlnplot_single <- function(
            strip.text = element_text( size = font.size),
            legend.title = element_blank(),
            legend.position = 'none',
              plot.title = element_text(size=(font.size+2), hjust = 0.5))+coord_cartesian(ylim = c(0, max_exp), clip = 'off')+
        facet_wrap(~variable, scales = 'free_x')
      if(add.dot){
        p = p + geom_quasirandom(size=pt.size, alpha=0.2)
      }
      p
    } else {
      plot_list1<-list()
      plot_list2<-list()
      for(i in 1:length(groups)){
        group=groups[i]
        cell_count<-gene_count[,c(feature, group, "celltype")]
        for(j in 1:length(celltypes)){
          cell_count2 <- cell_count[cell_count$celltype==celltypes[j],]
          p<-ggplot(cell_count2, aes_string(x=group, y=feature, fill=group))+
            geom_violin(scale = 'width', adjust = 1, trim = TRUE, size=0.3, alpha=0.5, color="pink")+
            xlab("") + ylab(celltypes[j]) +
            theme(panel.background = element_rect(fill = "white",colour = "black"),
                  legend.position = "none", 
                  axis.text.x = element_blank(), 
                  axis.ticks.x = element_blank(), 
                  axis.title.y = element_text(size = font.size, angle = 0), 
                  axis.text.y = element_text(size = (font.size-2)),
                  plot.margin = unit(c(-0.5, 0, -0.5, 0), "cm") ) +coord_cartesian(ylim = c(0, max_exp), clip = 'off')
            plot.title = element_text(size=(font.size),face = "bold", hjust = 0.5))+
      facet_grid(celltype~variable, scales = 'free_x')
    if(add.dot){
      p = p + geom_quasirandom(size=pt.size, alpha=0.2)
    }
          plot_list1[[j]]<-p
        }
        plot_list1[[length(plot_list1)]]<- plot_list1[[length(plot_list1)]] +
          theme(axis.text.x=element_text(angle = 45, hjust = 1, vjust = 1, size = font.size), axis.ticks.x = element_line(), axis.title.x = element_text(size=font.size))+ xlab(group)
        p2<-patchwork::wrap_plots(plotlist = plot_list1, ncol = 1) 
        plot_list2[[i]]<-p2
        plot_list1<-list()
      }
      p<-patchwork::wrap_plots(plotlist = plot_list2) + patchwork::plot_annotation(title = feature) & theme(plot.title = element_text(hjust = 0.5, size = (font.size+2)))
      p
    }
    g <- change_strip_background(p, type = 'both',  strip.color = strip.color)
    print(grid::grid.draw(g))
  }
}

@@ -200,6 +131,7 @@ complex_vlnplot_single <- function(
#' @param font.size Font size for the labels.
#' @param pt.size Point size for the data points on the violin
#' @param alpha Point transparency. value from 0 to 1.
#' @param strip.color Colors for the strip background
#' @return A ggplot object
#' @export

@@ -209,9 +141,10 @@ complex_vlnplot_multiple <- function(
  celltypes=NULL,
  group,
  add.dot = T,
  font.size=14,
  font.size=12,
  pt.size=0.1,
  alpha=0.01
  alpha=0.01,
  strip.color = NULL
){
  if(length(features)<2){
    stop("At least two genes are required. For single gene violin plot, please use complex_vlnplot_single instead.")
@@ -239,24 +172,23 @@ complex_vlnplot_multiple <- function(
                               variable.name = "Genes", value.name = "Expr")
  gene_count[, group]<-factor(gene_count[, group], levels = group_level)
  gene_count[, "celltype"]<-factor(gene_count[, "celltype"], levels = celltypes)
  ##The plot is modified from https://github.com/ycl6/StackedVlnPlot
  p<-ggplot(gene_count, aes_string(group, "Expr", fill = group)) +
    geom_violin(scale = 'width', adjust = 1, trim = TRUE, size=0.3, alpha=0.5, color="pink") +
    scale_y_continuous(expand = c(0, 0), position="right", labels = function(x)
    scale_y_continuous(expand = c(0, 0), position="left", labels = function(x)
      c(rep(x = "", times = length(x)-2), x[length(x) - 1], "")) +
    facet_grid(rows = vars(Genes), cols = vars(celltype),scales = "free", switch = "y") +
    theme_cowplot(font_size = 12) +
    theme(legend.position = "none", panel.spacing = unit(0, "lines"),
          plot.title = element_text(hjust = 0.5),
          panel.background = element_rect(fill = NA, color = "black"),
          axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
          strip.background = element_blank(),
          strip.text = element_text(face = "bold"),
          strip.text.y.left = element_text(angle = 0)) +
    theme(panel.background = element_rect(fill = "white",colour = "black"),
          axis.title = element_text(size = font.size), 
          axis.text.x = element_text(size = font.size, angle = 45, hjust = 1, vjust = 1),
          axis.text.y = element_text(size=(font.size)),
          strip.text = element_text( size = font.size),
          legend.title = element_blank(),
          legend.position = 'none') +
    facet_grid(celltype~Genes, scales =  'free_x') +
    xlab("") + ylab("")
  if(add.dot){
    p = p + geom_quasirandom(size=pt.size, alpha=alpha)
  }
  p
  g <- change_strip_background(p, type = 'both',  strip.color = strip.color)
  print(grid::grid.draw(g))
}
+7 −1
Original line number Diff line number Diff line
@@ -385,7 +385,7 @@ firstup <- function(

#' A function to change the strip background color in ggplot
#' @param ggplt_obj A ggplot object
#' @param type Strip on the "top" side only or "both" sides
#' @param type Strip on the "top" or "right" side only or "both" sides
#' @param strip.color A color vector
#' @export
#' 
@@ -401,6 +401,12 @@ change_strip_background <- function(
    if(is.null(fills)){
    fills<- scales::hue_pal(l=90)(length(strip_both))
    }
  } else if(type=="right"){
    strip_both <- which(grepl('strip-r', g$layout$name))
    fills<-strip.color
    if(is.null(fills)){
      fills<- scales::hue_pal(l=90)(length(strip_both))
    }
  } else {
    strip_t <- which(grepl('strip-t', g$layout$name))
    strip_r <- which(grepl('strip-r', g$layout$name))
+3 −3
Original line number Diff line number Diff line
@@ -90,7 +90,7 @@ dev.off()
![alt text](https://github.com/HaojiaWu/Plot1cell/blob/master/data/vlnplot_single_split.png) <br />
#### One gene/multiple group factors violin plot:
```
png(filename =  'vlnplot_multiple.png', width = 8, height = 6,units = 'in', res = 300)
png(filename =  'vlnplot_multiple.png', width = 6, height = 6,units = 'in', res = 300)
complex_vlnplot_single(iri.integrated, feature = "Havcr1", groups = c("Group","Replicates"),celltypes   = c("PTS1" ,   "PTS2"  ,  "PTS3"  ,  "NewPT1" , "NewPT2"))
dev.off()
```
@@ -101,7 +101,7 @@ Note that the Replicates group here is for demo purpose. This is not the mouse I
#### Multiple genes/one group factor violin plot:
```
png(filename =  'vlnplot_multiple_genes.png', width = 8, height = 6,units = 'in', res = 300)
complex_vlnplot_multiple(iri.integrated, features = c("Havcr1",  "Slc34a1", "Vcam1",   "Krt20"  , "Slc7a13", "Slc5a12"), celltypes = c("PTS1" ,   "PTS2"  ,  "PTS3"  ,  "NewPT1" , "NewPT2"), group = "Group", add.dot=T, pt.size=0.001, alpha=0.001)
complex_vlnplot_multiple(iri.integrated, features = c("Havcr1",  "Slc34a1", "Vcam1",   "Krt20"  , "Slc7a13", "Slc5a12"), celltypes = c("PTS1" ,   "PTS2"  ,  "PTS3"  ,  "NewPT1" , "NewPT2"), group = "Group", add.dot=T, pt.size=0.01, alpha=0.01, font.size = 10)
dev.off()
```
![alt text](https://github.com/HaojiaWu/Plot1cell/blob/master/data/vlnplot_multiple_genes.png) <br />
@@ -156,7 +156,7 @@ help(package = plot1cell)
Many more functions will be added in the future package development. For questions, please raise an issue in this github page or contact <a href="https://humphreyslab.com">TheHumphreysLab</a>. 

### 9. Attributions
This package uses many methods from Seurat (https://github.com/satijalab/seurat) to process the data for ploting. The circlize and heatmap plots were generated by the circlize (https://github.com/jokergoo/circlize) and ComplexHeatmap (https://github.com/jokergoo/ComplexHeatmap) packages. The Upset plot was generated by the ComplexUpset package (https://github.com/krassowski/complex-upset). The violin plot to show multiple genes across groups was inspired by the codes provided here (https://github.com/ycl6/StackedVlnPlot). Most of the graphs were generated by ggplot2 (https://github.com/tidyverse/ggplot2). The package benefits from the following dependencies.
This package uses many methods from Seurat (https://github.com/satijalab/seurat) to process the data for ploting. The circlize and heatmap plots were generated by the circlize (https://github.com/jokergoo/circlize) and ComplexHeatmap (https://github.com/jokergoo/ComplexHeatmap) packages. The Upset plot was generated by the ComplexUpset package (https://github.com/krassowski/complex-upset). Most of the graphs were generated by ggplot2 (https://github.com/tidyverse/ggplot2). The package benefits from the following dependencies.

```
    Seurat,
−19.2 KiB (293 KiB)
Loading image diff...
+207 KiB (442 KiB)
Loading image diff...
Loading