Commit b6b36125 authored by Christoph's avatar Christoph
Browse files

zUMIs2.7.2 new BC features

parent ff541068
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -29,6 +29,8 @@ We provide a script to convert zUMIs output into loom file automatically based o
zUMIs will try to automatically do this, otherwise convert zUMIs output to loom by simply running `Rscript rds2loom.R myRun.yaml`.

## Changelog
27 Mar 2020: zUMIs2.7.2: New barcode handling functionalities: When using intersection of automatic BC detection and BC whitelist and the full barcode is composed out of several barcode pieces (eg. RT barcode + illumina barcode), the whitelist can now also just be corresponding to just one of the barcode pieces (eg. RT barcode only whitelist). Furthermore, some scRNA-seq protocols may have several cell barcodes that belong to the same cell (eg. SPLiT-seq with oligo-dT/random-hex round 1 barcode; i7 barcode mix in 10x Genomics). zUMIs now supports internally combing the counts via the `barcode_sharing:` option. Please look at the [wiki for further details](https://github.com/sdparekh/zUMIs/wiki/Barcodes#barcode-sharing-feature) and at [examples for some protocols](https://github.com/sdparekh/zUMIs/wiki/Protocol-specific-setup).
 
16 Mar 2020: zUMIs2.7.1: Smart-seq3 data can be run with the proper consideration of strand information. When setting `strand: 1`, UMI reads will use this strand while non-UMI reads will stay unstranded.

19 Feb 2020: [zUMIs2.7.0 released](https://github.com/sdparekh/zUMIs/releases/tag/2.7.0): Simplify installation greatly by a cond-pack of miniconda with all dependencies. The conda environment is used with `zUMIs-master.sh -c`, with the old behavior staying as the default. 
+76 −25
Original line number Diff line number Diff line
@@ -155,7 +155,16 @@ setDownSamplingOption<-function( down ,bccount, filename=NULL){
    .barcode_plot(bccount,outfilename)
  }

  if(length(bccount[,XC] %in% bc_wl)>0){
  if(nchar(bccount[keep==TRUE,XC][1]) != nchar(bc_wl[1])){
    print("length of barcodes not equal to given barcode list, trying to match up...")
    search_vector <-  bccount[keep==TRUE,XC]
    bc_matched <- parallel::mcmapply(function(x) grep(pattern = x, x =search_vector), bc_wl, mc.cores = opt$num_threads, mc.preschedule = TRUE)
    bc_matched <- unlist(bc_matched)
    if(length(bc_matched)>0){
      to_remove <- search_vector[-bc_matched]
      bccount[XC %in% to_remove,keep:=FALSE]
    }
  }else if(sum(bccount[keep==TRUE,XC] %in% bc_wl)>0){
    bccount[ !(XC %in% bc_wl),keep:=FALSE]
  }else{
    warning("None of the frequent barcodes is present in the whitelist. Keep all automatically detected BCs.")
@@ -230,6 +239,7 @@ BCbin <- function(bccount_file, bc_detected) {
                                                                            !( XC %in% true_BCs )   ]
  nocell_BCs <- nocell_bccount[,XC]
  
  if(opt$barcodes$BarcodeBinning>0){
    #break up in pieces of 1000 real BCs in case the hamming distance calculation gets too large!
    true_chunks <- split(true_BCs, ceiling(seq_along(true_BCs)/1000))
    for(i in 1:length(true_chunks)){
@@ -255,7 +265,48 @@ BCbin <- function(bccount_file, bc_detected) {
                     , n_false := NULL][
                     , n_min := NULL][
                     , n := nocell_bccount[match(falseBC,nocell_bccount$XC),n]]
  }else{
    binmap <- data.table()
  }
  
  if(!is.null(opt$barcodes$barcode_sharing)){
    share_table <- data.table::fread( opt$barcodes$barcode_sharing, header = F, skip = 1)
    if(ncol(share_table) > 2){ #flatten table more if necessary
      share_table <- data.table::melt(share_table, id.vars = "V1")[,variable := NULL]
    }
    setnames(share_table, c("main_bc","shared_bc"))
    
    share_mode <- data.table::fread( opt$barcodes$barcode_sharing, header = F, nrows = 1)$V1
    share_mode <- as.numeric(unlist(strsplit(gsub(pattern = "#",replacement = "", x = share_mode),"-")))
    
    if(nrow(binmap)>0){ #first fix the noisy BC assignments so that they dont go into share barcodes
      binmap[, partial_bc := substr(trueBC,start = share_mode[1], stop = share_mode[2])] 
      binmap <- merge(binmap, share_table, all.x = TRUE, by.x = "partial_bc", by.y = "shared_bc")
      substr(binmap[!is.na(main_bc)]$trueBC,start = share_mode[1], stop = share_mode[2]) <- binmap[!is.na(main_bc)]$main_bc #replace to main barcode string
      binmap[,c("partial_bc", "main_bc") := NULL]
    }
    
    #now check for merging detected barcodes
    bc_detected[, partial_bc := substr(XC,start = share_mode[1], stop = share_mode[2])]
    bc_detected <- merge(bc_detected, share_table, all.x = TRUE, by.x = "partial_bc", by.y = "shared_bc")
    share_map <- bc_detected[!is.na(main_bc)]
    share_map[,trueBC := XC]
    substr(share_map$trueBC, start = share_mode[1], stop = share_mode[2]) <- share_map$main_bc
    setnames(share_map,"XC","falseBC")
    share_map[,c("partial_bc","main_bc","cellindex") := NULL][,hamming := 0]
    share_map <- share_map[,c("falseBC","hamming","trueBC","n"), with = FALSE]
    
    #messy move the shorter true bc list to the main R script
    bc_detected <- bc_detected[is.na(main_bc)]
    bc_detected[,c("partial_bc","main_bc") := NULL]
    bccount <<- bc_detected
    
    if(nrow(binmap)>0){
      binmap <- rbind(binmap, share_map)
    }else{
      binmap <- share_map
    }
  }
  print(paste("Found",nrow(binmap),"daughter barcodes that can be binned into",length(unique(binmap[,trueBC])),"parent barcodes."))
  print(paste("Binned barcodes correspond to",sum(binmap[,n]),"reads."))
  return(binmap)
+4 −1
Original line number Diff line number Diff line
@@ -12,6 +12,9 @@ setwd(opt$out_dir)
source(paste0(opt$zUMIs_directory,"/barcodeIDFUN.R"))
options(datatable.fread.input.cmd.message=FALSE)
data.table::setDTthreads(threads=opt$num_threads)
if(opt$barcodes$barcode_sharing == ""){
  opt$barcodes$barcode_sharing <- NULL
}

#######################################################################
#######################################################################
@@ -28,7 +31,7 @@ bccount<-cellBC(bcfile = opt$barcodes$barcode_file,
fwrite(bccount,file=paste0(opt$out_dir,"/zUMIs_output/",opt$project,"kept_barcodes.txt"))

#check if binning of adjacent barcodes should be run
if(opt$barcodes$BarcodeBinning > 0){
if(opt$barcodes$BarcodeBinning > 0 | !is.null(opt$barcodes$barcode_sharing)){
  binmap <- BCbin(bccount_file = paste0(opt$out_dir,"/", opt$project, ".BCstats.txt"),
                  bc_detected  = bccount)
  fwrite(binmap,file=paste0(opt$out_dir,"/zUMIs_output/",opt$project,".BCbinning.txt"))
+3 −0
Original line number Diff line number Diff line
@@ -154,6 +154,7 @@ ui <- fluidPage(
                            uiOutput("barcodeUI"),
                            numericInput("HamBC","Hamming distance collapsing of close cell barcode sequences.",value=1,min=0,max=5,step=1),
                            numericInput("nReadsBC","Keep only the cell barcodes with atleast n number of reads",value=100,min=1,max=5,step=1),
                            textInput("sharedBC",label = "Optional: Barcode Sharing (path to file):",value = NULL),
                            checkboxInput("demux", label = "Demultiplex into per-cell bam files?", value = F),
                            shinyBS::bsTooltip(id="demux", title = "Output files will be stored in zUMIs_output/demultiplexed/ .", 
                                               placement = "top", trigger = "hover",options = list(container = "body"))
@@ -391,6 +392,7 @@ server <- function(input, output, session) {
      "barcodes" = list(
        "barcode_num" = input$BCnum,
        "barcode_file" = input$BCfile,
        "barcode_sharing" = input$sharedBC,
        "automatic" = ifelse(input$barcodeChoice=="Automatic", TRUE, FALSE),
        "BarcodeBinning" = input$HamBC,
        "nReadsperCell" = input$nReadsBC,
@@ -512,6 +514,7 @@ server <- function(input, output, session) {

      updateNumericInput(session = session, inputId = "HamBC", value = ya$barcodes$BarcodeBinning)
      updateNumericInput(session = session, inputId = "nReadsBC", value = ya$barcodes$nReadsperCell)
      updateTextInput(session = session, inputId = "sharedBC", value = ya$barcodes$barcode_sharing)
      updateCheckboxInput(session = session, inputId = "demux", value = ya$barcodes$demultiplex)
      
      if(!is.null(ya$read_layout)){
+1 −1
Original line number Diff line number Diff line
@@ -3,7 +3,7 @@
# Pipeline to run UMI-seq analysis from fastq to read count tables.
# Authors: Swati Parekh, Christoph Ziegenhain, Beate Vieth & Ines Hellmann
# Contact: sparekh@age.mpg.de or christoph.ziegenhain@ki.se
vers=2.7.1d
vers=2.7.2
currentv=`curl -s https://raw.githubusercontent.com/sdparekh/zUMIs/master/zUMIs-master.sh | grep '^vers=' | cut -f2 -d "="`
if [ "$currentv" != "$vers" ]; then echo -e "------------- \n\n Good news! A newer version of zUMIs is available at https://github.com/sdparekh/zUMIs \n\n-------------"; fi

Loading