Commit 0126333e authored by HaojiaWu's avatar HaojiaWu
Browse files

more updates

parent be3df337
Loading
Loading
Loading
Loading
+191 −69
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@
#' @param do.scale Whether or not to scale the dot when percentage expression of the gene is less than 20.
#' @return A ggplot object
#' @export

complex_dotplot_single <- function(
  seu_obj, 
  feature, 
@@ -30,17 +31,25 @@ complex_dotplot_single <- function(
  do.scale=T,
  scale.by='radius'
){
  groups_level<-levels(seu_obj@meta.data[,groups])
  if (is.null(groups_level)){
    seu_obj@meta.data[,groups] <-factor(seu_obj@meta.data[,groups], levels = names(table(seu_obj@meta.data[,groups])))
    groups_level<-levels(seu_obj@meta.data[,groups])
  if(is.null(color.palette)){
    color.palette <- colorRampPalette(c('grey80','lemonchiffon1','indianred1','darkred'))(255)
  }
  scale.func <- switch(
    EXPR = scale.by,
    'size' = scale_size,
    'radius' = scale_radius,
    stop("'scale.by' must be either 'size' or 'radius'")
  ) ### This function is from Seurat https://github.com/satijalab/seurat
  if(is.null(celltypes)){
    celltypes<-levels(seu_obj)
  }
  if(is.null(color.palette)){
    color.palette <- colorRampPalette(c('grey80','lemonchiffon1','indianred1','darkred'))(255)
  if(length(groups)==1){
    groups_level<-levels(seu_obj@meta.data[,groups])
    if (is.null(groups_level)){
      seu_obj@meta.data[,groups] <-factor(seu_obj@meta.data[,groups], levels = names(table(seu_obj@meta.data[,groups])))
      groups_level<-levels(seu_obj@meta.data[,groups])
    } 
    
    if(!is.null(splitby)){
      if (is.null(levels(seu_obj@meta.data[,splitby]))){
        seu_obj@meta.data[,splitby] <-factor(seu_obj@meta.data[,splitby], levels = names(table(seu_obj@meta.data[,splitby])))
@@ -72,12 +81,6 @@ complex_dotplot_single <- function(
      data_plot$groups <- factor(data_plot$groups, levels = groups_level)
      data_plot$celltype <- factor(data_plot$celltype, levels = rev(celltypes))
    }
  scale.func <- switch(
    EXPR = scale.by,
    'size' = scale_size,
    'radius' = scale_radius,
    stop("'scale.by' must be either 'size' or 'radius'")
  ) ### This function is from Seurat https://github.com/satijalab/seurat
    data_plot$pct.exp <- round(100 * data_plot$pct.exp, 2)
    data_plot$avg.exp <- scale(data_plot$avg.exp)
    p<-ggplot(data_plot, aes(y = celltype, x = groups)) +  
@@ -109,8 +112,121 @@ complex_dotplot_single <- function(
    } else {
      p
    }
  } else {  ### group number greater than 1
    gene_count<-extract_gene_count(seu_obj=seu_obj, features = feature, cell.types = celltypes, meta.groups = c(groups, splitby))
    allgroups<-c(groups,splitby )
    for(i in 1:length(allgroups)){
      if (is.null(levels(seu_obj@meta.data[,allgroups[i]]))){
        seu_obj@meta.data[,allgroups[i]] <-factor(seu_obj@meta.data[,allgroups[i]], levels = names(table(seu_obj@meta.data[,allgroups[i]])))
      }
      group_level<-levels(seu_obj@meta.data[,allgroups[i]])
      gene_count[,allgroups[i]]<-factor(gene_count[,allgroups[i]],
                                        levels = group_level)
    }
    gene_count$celltype<-factor(gene_count$celltype, levels = celltypes)
    all_levels<-list()
    for(i in 1:length(groups)){
      if (is.null(levels(seu_obj@meta.data[,groups[i]]))){
        seu_obj@meta.data[,groups[i]] <-factor(seu_obj@meta.data[,groups[i]], levels = names(table(seu_obj@meta.data[,groups[i]])))
      }
      group_level<-levels(seu_obj@meta.data[,groups[i]])
      all_levels[[i]]<-group_level
    }
    all_levels<-as.character(unlist(all_levels))
    data_plot<-list()
    for(i in 1:length(groups)){
      count_df <- gene_count
      count_df$new_group<-paste(gene_count[,groups[i]], gene_count[,"celltype"],sep = "___")
      exp_df<-aggregate(.~new_group, data=count_df[,c('new_group',feature)], FUN=function(x){mean(expm1(x))})
      pct_df<-aggregate(.~new_group, data=count_df[,c('new_group',feature)], FUN=function(x){length(x[x > 0]) / length(x)})
      colnames(exp_df)[2]<-"avg.exp"
      colnames(pct_df)[2]<-"pct.exp"
      df1<-merge(exp_df, pct_df, by='new_group')
      df1$groupID <- groups[i]
      data_plot[[i]] <- df1
    }
    data_plot <- do.call("rbind", data_plot)
    data_plot$groups <- as.character(lapply(X=strsplit(data_plot$new_group, split = "___"),FUN = function(x){x[[1]]}))
    data_plot$celltype <- as.character(lapply(X=strsplit(data_plot$new_group, split = "___"),FUN = function(x){x[[2]]}))
    data_plot$groups <- factor(data_plot$groups, levels = all_levels)
    data_plot$celltype <- factor(data_plot$celltype, levels = rev(celltypes))
    data_plot$groupID <- factor(data_plot$groupID, levels = groups)
    data_plot$pct.exp <- round(100 * data_plot$pct.exp, 2)
    data_plot$avg.exp <- scale(data_plot$avg.exp)
    if(is.null(splitby)){
      p<-ggplot(data_plot, aes(y = celltype, x = groups)) +  
        geom_tile(fill="white", color="white") +
        geom_point(aes( colour=avg.exp, size =pct.exp))  +  
        scale_color_gradientn(colours  =  color.palette )+ 
        theme(panel.background = element_rect(fill = "white", colour = "black"),
              axis.text.x = element_text(angle = 45, hjust = 1, size = font.size),
              plot.title = element_text(size = (font.size +2), hjust = 0.5, face = 'bold'),
              axis.text = element_text(size = font.size),
              legend.text=element_text(size=(font.size-2)),
              legend.title = element_text(size = (font.size)),
              strip.text = element_text( size = font.size),
              legend.position="right")+
        ylab("")+xlab("")+ggtitle(feature)+facet_wrap(~groupID, scales = 'free_x')
      if(do.scale){
        p = p + scale_size(range = c(0, 10))
      } else {
        if(max(data_plot$pct.exp)>=20){
          p = p + scale_size(range = c(0, 10))
        } else {
          p = p + scale.func(range = c(0, 10), limits = c(0, 20))
        }
      }
      g <- change_strip_background(p, type = 'top',  strip.color = strip.color)
      print(grid::grid.draw(g))
    } else {
      df2<-reshape2::melt(gene_count[,c(groups, splitby)], measure.vars  = groups)
      df2<-df2[!duplicated(df2$value),]
      colnames(df2)[colnames(df2) == "value"]<-"groups"
      data_plot2<-list()
      for(i in 1:length(groups)){
        df3<-data_plot[data_plot$groupID==groups[i],]
        df4<-df2[df2$variable==groups[i],c('groups', splitby[i])]
        colnames(df4)[2]<-"split"
        df5<-merge(df3, df4, by='groups')
        data_plot2[[i]]<-df5
      }
      data_plot2<-do.call("rbind", data_plot2)
      fill_x1<-grDevices::rainbow(length(groups), alpha = 0.5)
      fill_x2<-list()
      for(i in 1:length(splitby)){
        n_col<-unique(gene_count[, splitby[i]])
        fill_x2[[i]]<-scales::hue_pal(l=90)(length(n_col))
      }
      fill_x2<-as.character(unlist(fill_x2))
      fill_x <- c(fill_x1, fill_x2)
      p<-ggplot(data_plot2, aes(y = celltype, x = groups)) +  
        geom_tile(fill="white", color="white") +
        geom_point(aes( colour=avg.exp, size =pct.exp))  +  
        scale_color_gradientn(colours  =  color.palette )+ 
        theme(panel.background = element_rect(fill = "white", colour = "black"),
              axis.text.x = element_text(angle = 45, hjust = 1, size = font.size),
              plot.title = element_text(size = (font.size +2), hjust = 0.5, face = 'bold'),
              axis.text = element_text(size = font.size),
              legend.text=element_text(size=(font.size-2)),
              legend.title = element_text(size = (font.size)),
              strip.text = element_text( size = font.size),
              legend.position="right")+
        ylab("")+xlab("")+ggtitle(feature)+
        facet_nested(~ groupID + split, scales = "free_x", 
                     strip = strip_nested( background_x = elem_list_rect(fill = fill_x)))
      if(do.scale){
        p = p + scale_size(range = c(0, 10))
      } else {
        if(max(data_plot$pct.exp)>=20){
          p = p + scale_size(range = c(0, 10))
        } else {
          p = p + scale.func(range = c(0, 10), limits = c(0, 20))
        }
      }
      p
    }
  }
}


#' Plot multiple genes across groups
#'
@@ -122,15 +238,18 @@ complex_dotplot_single <- function(
#' @param seu_obj A complete Seurat object
#' @param features A vector of gene names.
#' @param celltypes Cell types to be included in the dot plot. Default: all cell types.
#' @param groups Group ID Must be one of the column names in the meta.data slot of the Seurat object.
#' @param groups Group ID must be one of the column names in the meta.data slot of the Seurat object.
#' @param color.palette Color for gene expression.
#' @param strip.color Colors for the strip background
#' @return A ggplot object
#' @export

complex_dotplot_multiple <- function(
  seu_obj, 
  features, 
  celltypes=NULL,
  groups, 
  color.palette = NULL,
  strip.color = NULL
  ){
 pb <- progress_bar$new(
@@ -150,11 +269,14 @@ complex_dotplot_multiple <- function(
  all_data<-do.call('rbind', plot_list)
  all_data$gene<-factor(all_data$gene, levels = rev(features)) 
  all_data$celltype <- factor(all_data$celltype, levels = levels(seu_obj))
  if(is.null(color.palette)){
    color.palette <- colorRampPalette(c('grey80','lemonchiffon1','indianred1','darkred'))(255)
  }
  p <- invisible(
    ggplot(all_data, aes(x = groups, y = gene)) +  
    geom_tile(fill="white", color="white") +
    geom_point(aes( colour=avg.exp, size =pct.exp), alpha=0.9)  +  
    scale_color_gradientn(colours  =  grDevices::colorRampPalette(c('grey80','lemonchiffon1','indianred1','darkred'))(255))+ 
    scale_color_gradientn(colours  =  color.palette)+ 
    scale_size(range = c(0, 10))+
    theme(panel.background = element_rect(fill = "white", colour = "black"),
          axis.text.x = element_text(angle = 45, hjust = 1),
+55 −25

File changed.

Preview size limit exceeded, changes collapsed.

+27 −13
Original line number Diff line number Diff line
@@ -49,7 +49,7 @@ dev.off()
Here is an example to use plot1cell to show one gene expression across different cell types across groups.
```
png(filename =  'dotplot_single.png', width = 4, height = 6,units = 'in', res = 100)
complex_dotplot_single(seu_obj = iri.integrated, feature = "Havcr1",groupby = "Group")
complex_dotplot_single(seu_obj = iri.integrated, feature = "Havcr1",groups = "Group")
dev.off()
```
![alt text](https://github.com/HaojiaWu/Plot1cell/blob/master/data/dotplot_single.png) <br />
@@ -58,15 +58,36 @@ If the group factor can be classified by another factor, complex_dotplot_single
iri.integrated@meta.data$Phase<-plyr::mapvalues(iri.integrated@meta.data$Group, from = levels(iri.integrated@meta.data$Group), to = c("Healthy",rep("Injury",3), rep("Recovery",2)))
iri.integrated@meta.data$Phase<-as.character(iri.integrated@meta.data$Phase)
png(filename =  'dotplot_single_split.png', width = 4, height = 6,units = 'in', res = 100)
complex_dotplot_single(iri.integrated, feature = "Havcr1",groupby = "Group",splitby = "Phase")
complex_dotplot_single(iri.integrated, feature = "Havcr1",groups = "Group",splitby = "Phase")
dev.off()
```
![alt text](https://github.com/HaojiaWu/Plot1cell/blob/master/data/dotplot_single_split.png) <br />

plot1cell also allows visualization of multiple genes in dotplot format. Here is an example.
To visualize the same gene on multiple group factors, simply add more group factor IDs to the "groups" argument.
```
png(filename =  'dotplot_more_groups.png', width = 8, height = 6,units = 'in', res = 100)
complex_dotplot_single(seu_obj = iri.integrated, feature = "Havcr1",groups= c("Group","Replicates"))
dev.off()
```
![alt text](https://github.com/HaojiaWu/Plot1cell/blob/master/data/dotplot_more_groups.png) <br />

Each group factor can be further splitted by its own factor by setting the splitby argument. Note that in this case, the order of the group factors needs to match the order of splitby factors.
```
iri.integrated@meta.data$ReplicateID<-plyr::mapvalues(iri.integrated@meta.data$Replicates, from = names(table((iri.integrated@meta.data$Replicates))), to = c(rep("Rep1",3),rep("Rep2",3), rep("Rep3",1)))
iri.integrated@meta.data$ReplicateID<-as.character(iri.integrated@meta.data$ReplicateID)

png(filename =  'dotplot_more_groups_split.png', width = 9, height = 6,units = 'in', res = 200)
complex_dotplot_single(seu_obj = iri.integrated, feature = "Havcr1",groups= c("Group","Replicates"), splitby = c("Phase","ReplicateID"))
dev.off()
### In this example, "Phase" is a splitby factor for "Group" and "ReplicateID" is a splitby factor for "Replicates".
```
![alt text](https://github.com/HaojiaWu/Plot1cell/blob/master/data/dotplot_more_groups_split.png) <br />
Note that the Replicates group here is just for showcase purpose. This is not a meaning group ID in our snRNA-seq dataset.

To visualize multiple genes in dotplot format, ```complex_dotplot_multiple``` should be used.
```
png(filename =  'dotplot_multiple.png', width = 10, height = 4,units = 'in', res = 300)
complex_dotplot_multiple(seu_obj = iri.integrated, features = c("Slc34a1","Slc7a13","Havcr1","Krt20","Vcam1"),groupby = "Group", celltypes = c("PTS1" ,   "PTS2"  ,  "PTS3"  ,  "NewPT1" , "NewPT2"))
complex_dotplot_multiple(seu_obj = iri.integrated, features = c("Slc34a1","Slc7a13","Havcr1","Krt20","Vcam1"),group = "Group", celltypes = c("PTS1" ,   "PTS2"  ,  "PTS3"  ,  "NewPT1" , "NewPT2"))
dev.off()
```
![alt text](https://github.com/HaojiaWu/Plot1cell/blob/master/data/dotplot_multiple.png) <br />
@@ -96,24 +117,17 @@ dev.off()
```
![alt text](https://github.com/HaojiaWu/Plot1cell/blob/master/data/vlnplot_multiple.png) <br />

Each group factor can be further splitted by its own factor by setting the splitby argument. Note that in this case, the order of the group factors needs to match the order of splitby factors. For example:
Similar to the functionality in complex_dotplot, each group factor can also be splitted by another factor in violin plot. For example:
```
iri.integrated@meta.data$ReplicateID<-plyr::mapvalues(iri.integrated@meta.data$Replicates, from = names(table((iri.integrated@meta.data$Replicates))), to = c(rep("Rep1",3),rep("Rep2",3), rep("Rep3",1)))
iri.integrated@meta.data$ReplicateID<-as.character(iri.integrated@meta.data$ReplicateID)

png(filename =  'vlnplot_multiple_split.png', width = 7, height = 5,units = 'in', res = 200)
complex_vlnplot_single(iri.integrated, feature = "Havcr1", groups = c("Group","Replicates"),
                        celltypes   = c("PTS1" ,   "PTS2"  ,  "PTS3"  ,  "NewPT1" , "NewPT2"), 
                        font.size = 10, splitby = c("Phase","ReplicateID"), pt.size=0.05)
dev.off()

### In this example, "Phase" is a splitby factor for "Group" and "ReplicateID" is a splitby factor for "Replicates".

```

![alt text](https://github.com/HaojiaWu/Plot1cell/blob/master/data/vlnplot_multiple_split.png) <br />

Note that the Replicates group here is just for showcase purpose. This is not a meaning group ID in our snRNA-seq dataset.

#### Multiple genes/one group factor violin plot:
```
@@ -124,7 +138,7 @@ dev.off()
![alt text](https://github.com/HaojiaWu/Plot1cell/blob/master/data/vlnplot_multiple_genes.png) <br />

#### Multiple genes/multiple group factors.
The violin plot will look too messy in this scenario so it is not included in plot1cell. It is highly recommended to use the complex_dot_plot instead. <br />
The violin plot will look too messy in this scenario so it is not included in plot1cell. <br />

### 4. Umap geneplot across groups
```
+89.6 KiB
Loading image diff...
+223 KiB
Loading image diff...
Loading