Commit 3d1926b8 authored by ziegenhain@bio.lmu.de's avatar ziegenhain@bio.lmu.de
Browse files

bug fixed & new shiny app to configure yaml files

parent 3ef70087
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -28,6 +28,7 @@ sumstatBAM <- function(featfiles,cores,outdir,user_seq,bc,outfile){
                               ][type=="NoFeatures",type:="Intergenic"
                               ][,XS:=NULL]
    saveRDS(mapCount,file=outfile)
    system(paste0("rm ",outdir,"/freadHeader"))
  return( mapCount )
}

zUMIs-config_shiny.R

0 → 100644
+299 −0
Original line number Diff line number Diff line
#
# This is a Shiny web application. You can run the application by clicking
# the 'Run App' button above.
#
# Find out more about building applications with Shiny here:
#
#    http://shiny.rstudio.com/
#

library(shiny)
library(yaml)

# Define UI for application
ui <- fluidPage(
   # Application title
   titlePanel("zUMIs-config: Generate yaml file"),
   navlistPanel(widths = c(2, 10),
     tabPanel("Mandatory Parameters",
     #set basic things
     fluidRow(
       column(6,wellPanel(
        textInput(inputId="runID", label="Name of the run/project", placeholder="eg: my_zUMIs_run")
      )),
      column(6,wellPanel(
        textInput(inputId="outDir", label="Full path to the output directory", placeholder="eg: /path/to/output")
      ))
     ),

     #STAR options


     # slider input for number of fastq reads
     fluidRow(
       h4("Input options:",style = "padding-left: 20px;"),
        column(4,wellPanel(
           sliderInput("nfiles", "Number of Input fastq files:", min = 1, max = 4,value = 2)
        )),
        column(8,wellPanel(
          uiOutput("fqUI")
        ))
     ),


     fluidRow(

       column(3,wellPanel(
         uiOutput("fqBCui")
       ),offset = 1),
       column(3,wellPanel(
         uiOutput("fqUMIui")
       )),
       column(3,wellPanel(
         uiOutput("fqCDNAui")
       ))
     ),

     fluidRow(
       h4("Mapping/Reference options:",style = "padding-left: 20px;"),
       column(6,wellPanel(
         textInput(inputId="STARpath", label="Full path to the STAR index directory", placeholder="eg: /path/to/output"),
         textInput(inputId="GTFpath", label="Full path to the annotation GTF file", placeholder="eg: /path/to/output"),
         textInput(inputId="STARparams", label="Optional: Additional mapping parameters for STAR", value=""),
         numericInput(inputId = "NUMadditionalFA",label = "Optional: Number of additional reference sequences (eg. ERCC.fa)",value = 0,min = 0,step = 1)
       )),
       column(6,wellPanel(
         p(strong("Additional references:")),
         uiOutput("refUI")
       ))
     )


   ),
   tabPanel("Optional Parameters",
    fluidRow(
      column(6,wellPanel(
        h4("General options:"),
        numericInput("numThreads","Number of CPU Threads:",value=8,min=1,step=1),
        numericInput("memLimit","Memory limit (Gb) 0 = no limit:",value=0,step=1),
        checkboxInput("makeStats",label="Produce summary statistics",value = T),
        checkboxInput("useSLURM",label="Use SLURM workload manager",value = F),
        selectInput("whichStage",label = "Start zUMIs from following stage:",choices = c("Filtering", "Mapping", "Counting", "Summarising"),selected = "Filtering",multiple = F)
      )),


      column(6,wellPanel(
        h4("Barcode & UMI filtering options:"),
        numericInput("BCbases","Number of barcode bases below quality:",value=1,min=1,step=1),
        numericInput("BCphred","Barcode Phred quality threshold:",value=20,min=1,max = 40,step=1),
        numericInput("UMIbases","Number of UMI bases below quality:",value=1,min=1,step=1),
        numericInput("UMIphred","UMI Phred quality threshold:",value=20,min=1,max = 40,step=1)
    )
   )),
   fluidRow(
     column(6,wellPanel(
       h4("Counting options:"),
       checkboxInput("countIntrons",label="Also count introns & exon+intron",value = T),
       textInput("downsamp",label = "Number of reads to downsample to. This value can be a fixed number of reads (e.g. 10000) or a desired range (e.g. 10000-20000). 0 invokes adaptive downsampling.",value = "0"),
       selectInput("strand",label = "Is the library stranded?",choices = c("unstranded" = 0, "positively stranded" = 1, "negatively stranded" = 2),selected = "unstranded",multiple = F),
       numericInput("HamDist","Hamming distance collapsing of UMI sequences:",value=0,min=0,max=5,step=1),
       checkboxInput("doVelocity",label="Generate velocyto-compatible counting of intron-exon spanning reads",value = F),
       checkboxInput("countPrimary",label="Count the primary Hits of multimapping reads towards gene expression levels?",value = T),
       checkboxInput("twoPass",label="Perform basic STAR twoPass mapping?",value = T)
     )),
     column(6,wellPanel(
       h4("Barcode options:"),
       radioButtons(inputId = "barcodeChoice",label = "Type of barcode selection:",choices = c("Automatic","Number of top Barcodes","Barcod whitelist")),
       uiOutput("barcodeUI"),
       numericInput("HamBC","Hamming distance collapsing of close cell barcode sequences. ATTENTION! This option is currently not implemented!",value=0,min=0,max=5,step=1),
       numericInput("nReadsBC","Keep only the cell barcodes with atleast n number of reads",value=100,min=1,max=5,step=1)

     ))
   )
),
tabPanel("Generate YAML!",
  p(strong("Your YAML is nearly ready...")),
  uiOutput("saveUI"),
  actionButton(inputId = "SaveYAML",label = "Save YAML!")
# ),
# tabPanel("Load YAML",
#   p(strong("You can load an existing YAML for zUMIs here")),
#   textInput(inputId="inYAML", label="Full path to YAML file", placeholder="eg: /path/to.yaml"),
#   actionButton(inputId = "LoadYAML",label = "Load YAML!")
)
))

# Define server logic
server <- function(input, output, session) {

  output$barcodeUI <- renderUI({
    switch(input$barcodeChoice,
       "Automatic" = p(em("Intact barcodes will be detected automatically.")),
       "Number of top Barcodes" = numericInput(inputId = "BCnum",label = "Number of barcodes to consider:",value = 100, min = 10, step = 1),
       "Barcod whitelist" = textInput(inputId = "BCfile",label = "File to barcode whitelist to use:", value = "/fullpath/to/file.txt")
    )
  })

  output$refUI <- renderUI({
    if(input$NUMadditionalFA>0){
      lapply(1:input$NUMadditionalFA, function(i) {
        textInput(inputId = paste0("FA_",i),label = paste("Full path to additional sequence",i))
      })
    }
  })

  output$fqUI <- renderUI({
    if(input$nfiles>0){
      lapply(1:input$nfiles, function(i) {
        textInput(inputId = paste0("fqpath_",i),label = paste("Full path to fastq file",i),placeholder = "/path/to/read.fq.gz")
      })
    }
  })

  output$fqBCui <- renderUI({
    # if(input$nfiles>0){
    #   lapply(1:input$nfiles, function(i) {
    #     checkboxGroupInput(inputId = paste0("fqptype_",i),choices = c("cDNA","BC","UMI"),label = paste("Content of read",i),inline = T)
    #   })
    # }
    lapply(1:input$nfiles, function(i) {
      textInput(inputId = paste0("BC_",i),label = paste("Read",i,"BC"),placeholder = "eg: 1-6")
    })

  })

  output$fqUMIui <- renderUI({
    lapply(1:input$nfiles, function(i) {
      textInput(inputId = paste0("UMI_",i),label = paste("Read",i,"UMI"),placeholder = "eg: 7-16")
    })
  })

  output$fqCDNAui <- renderUI({
    lapply(1:input$nfiles, function(i) {
      textInput(inputId = paste0("cDNA_",i),label = paste("Read",i,"cDNA"),placeholder = "eg: 1-50")
    })
  })


  output$saveUI <- renderUI({
    textInput(inputId = "savePath",label = "Save YAML file in this path:",value = paste0(input$outDir,"/",input$runID,".yaml"))
  })

  # output$basedefui <- renderUI({
  #   strong(paste("found"))
  #   lapply(1:input$nfiles, function(i){
  #     #strong(paste0('Hi, this is output B#', i))
  #
  #     #lapply(input$paste0("fqptype_",i), function(j){
  #       #textInput(inputId = tmp,label = paste("read",i,"input",j))
  #     #})
  #   })
  # })


  observeEvent(input$SaveYAML, {
    write_yaml(
      x = makeYAML(input),
      file = input$savePath
    )

  })

  makeYAML<-function(input){
    #collect fastq file information
    seqf<-list()
    for(i in 1:input$nfiles){
      bc_struc<-c(input[[paste0("cDNA_",i)]],input[[paste0("BC_",i)]],input[[paste0("UMI_",i)]])
      names(bc_struc)<-c("cDNA","BC","UMI")
      bc_struc<-bc_struc[which( bc_struc != "" )]

      seqf[[i]] <- list(
        "name" = input[[paste0("fqpath_",i)]],
        "base_definition" = paste0(names(bc_struc),"(",bc_struc,")")
      )
    }
    names(seqf) <- paste0("file",1:input$nfiles)

    #collect potential additional fasta files
    if(input$NUMadditionalFA>0){
      fa_list <- c()
      for(i in 1:input$NUMadditionalFA){
        fa_list <- c(fa_list,input[[paste0("FA_",i)]])
      }
    }else{
      fa_list<-NULL
    }

    #decide on barcode param
    #if(input$barcodeChoice)

    #make yaml list
    y <- list(
      "project" = input$runID,
      "sequence_files" = seqf,
      "reference" = list(
        "STAR_index" = input$STARpath,
        "GTF_file" = input$GTFpath,
        "additional_STAR_params" = input$STARparams,
        "additional_files" = fa_list
      ),
      "out_dir" = input$outDir,
      "num_threads" = input$numThreads,
      "mem_limit" = input$memLimit,
      "filter_cutoffs" = list(
        "BC_filter" = list(
          "num_bases" = input$BCbases,
          "phred" = input$BCphred
        ),
        "UMI_filter" = list(
          "num_bases" = input$UMIbases,
          "phred" = input$UMIphred
        )
      ),
      "barcodes" = list(
        "barcode_num" = input$BCnum,
        "barcode_file" = input$BCfile,
        "BarcodeBinning" = input$HamBC,
        "nReadsperCell" = input$nReadsBC
      ),
      "counting_opts" = list(
        "introns" = input$countIntrons,
        "downsampling" = input$downsamp,
        "strand" = as.integer(input$strand),
        "Ham_Dist" = input$HamDist,
        "velocyto" = input$doVelocity,
        "primaryHit" = input$countPrimary,
        "twoPass" = input$twoPass
      ),
      "make_stats" = input$makeStats,
      "use_SLURM" = input$useSLURM,
      "which_Stage" = input$whichStage
    )
    return(y)
  }

  observeEvent(input$LoadYAML, {
    if(file.exists(input$inYAML)){
      ya <- read_yaml(file = input$inYAML)
      updateTextInput(session = session,inputId = "runID",value = ya$project)
      updateTextInput(session = session,inputId = "outDir",value = ya$out_dir)
      updateTextInput(session = session,inputId = "STARpath",value = ya$reference$STAR_index)
      updateTextInput(session = session,inputId = "GTFpath",value = ya$reference$GTF_file)
      updateTextInput(session = session,inputId = "STARparams",value = ya$reference$additional_STAR_params)
      updateNumericInput(session = session, inputId = "NUMadditionalFA", value = length(ya$reference$additional_files))
      if(length(ya$reference$additional_files)>0){
        for(i in 1:length(ya$reference$additional_files)){
          updateTextInput(session = session,inputId = paste0("FA_",i), value = ya$reference$additional_files[i])
          print(input[[paste0("FA_",i)]])
        }
      }

    }else{
      print("File doesn't exist!")
    }
  })

}

# Run the application
shinyApp(ui = ui, server = server)
+10 −6
Original line number Diff line number Diff line
@@ -63,7 +63,7 @@ cDNA_read_length <- getmode(nchar(cDNA_peek$V1))

# Setup STAR mapping ------------------------------------------------------

param_defaults <- "--readFilesCommand samtools view --outSAMmultNmax 1 --outFilterMultimapNmax 50 --outSAMunmapped Within --outSAMtype BAM Unsorted --twopassMode Basic"
param_defaults <- "--readFilesCommand samtools view --outSAMmultNmax 1 --outFilterMultimapNmax 50 --outSAMunmapped Within --outSAMtype BAM Unsorted"
param_misc <- paste("--genomeDir",inp$reference$STAR_index,
                    "--sjdbGTFfile",gtf_to_use,
                    "--runThreadN",inp$num_threads,
@@ -73,6 +73,10 @@ param_misc <- paste("--genomeDir",inp$reference$STAR_index,
                    "--readFilesType SAM",inp$read_layout)

STAR_command <- paste(STAR_exec,param_defaults,param_misc,inp$reference$additional_STAR_params,param_additional_fa)
if(inp$reference$twoPass=T){
  STAR_command <- paste(STAR_command,"--twopassMode Basic")
}


#finally, run STAR
system(STAR_command)
+3 −3
Original line number Diff line number Diff line
@@ -68,10 +68,11 @@ typeCount <- sumstatBAM(featfiles=c(paste0(opt$out_dir,"/",opt$project,".filtere
                         outfile = paste0(opt$out_dir,"/zUMIs_output/stats/",opt$project,".bc.READcounts.rds"))
 
#only print per BC mapping stats if there are fewer than 200 BCs

if(length( unique(typeCount$RG))<=200  ){
tc<-data.frame(typeCount)
tc$type<-factor(tc$type, levels=rev(c("Exon","Intron","Intergenic","Ambiguity","Unmapped","User")))
write.table(tc,file = paste(opt$out_dir,"/zUMIs_output/stats/",opt$project,".readspercell.txt",sep=""),sep="\t",row.names = F,col.names = T)

if(length( unique(typeCount$RG))<=200  ){
  p <- ggplot( tc, aes(x=reorder(RG,N), y=N,fill=type))+
        geom_bar(stat = "identity",alpha=0.9,width=0.7) +
        xlab("") + 
@@ -85,7 +86,6 @@ if(length( unique(typeCount$RG))<=200 ){
           legend.title = element_blank())

  ggsave(p,filename = paste(opt$out_dir,"/zUMIs_output/stats/",opt$project,".readspercell.pdf",sep=""),width = 10,height = 7)
  write.table(tc,file = paste(opt$out_dir,"/zUMIs_output/stats/",opt$project,".readspercell.txt",sep=""),sep="\t",row.names = F,col.names = T)
}

##### Reads per cell based on their assignment type ###3
+1 −1
Original line number Diff line number Diff line
@@ -70,6 +70,7 @@ counting_opts:
  Ham_Dist: 0 #Hamming distance collapsing of UMI sequences.
  velocyto: no #Would you like velocyto-compatible counting of intron-exon spanning reads
  primaryHit: yes #Do you want to count the primary Hits of multimapping reads towards gene expression levels?
  twoPass: yes #perform basic STAR twoPass mapping

#produce stats files and plots?
make_stats: yes
@@ -81,4 +82,3 @@ use_SLURM: no
which_Stage: Filtering

#below, the fqfilter will add a read_layout flag defining SE or PE