Commit 3cfbf37e authored by Christoph's avatar Christoph
Browse files

small optimizations

parent bfafb043
Loading
Loading
Loading
Loading
+30 −7
Original line number Diff line number Diff line
@@ -366,14 +366,15 @@ correct_UB_tags <- function(bccount, samtoolsexc){
    UB_cmd <- paste(pl_path,bam,bamout,mm,samtoolsexc)
    UB_cmd_list[[i]] <- UB_cmd
  }
  bla <- parallel::mclapply(UB_cmd_list, system ,mc.cores = opt$num_threads, mc.preschedule=F)
  bla <- parallel::mclapply(UB_cmd_list, system, ignore.stderr = TRUE, mc.cores = opt$num_threads, mc.preschedule=F, mc.silent = TRUE)
  UB_cmd_list <- unlist(UB_cmd_list)
  UB_files <- as.character(data.frame(strsplit(UB_cmd_list," "),stringsAsFactors=F)[3,])
  #UB_files <- paste(UB_files, collapse = " ")
  UB_mergelist <- paste0(opt$out_dir,"/zUMIs_output/molecule_mapping/",opt$project,"mergelist.txt")
  write(UB_files, file = UB_mergelist)
  outbam <- paste0(opt$out_dir,"/",opt$project,".filtered.Aligned.GeneTagged.UBcorrected.bam")
  merge_cmd <- paste(samtoolsexc,"merge -n -t BC -@",opt$num_threads,"-b",UB_mergelist,outbam)
  #merge_cmd <- paste(samtoolsexc,"merge -n -t BC -@",opt$num_threads,"-b",UB_mergelist,outbam)
  merge_cmd <- paste(samtoolsexc,"cat -b",UB_mergelist,"-o",outbam)
  write(merge_cmd, file = paste0(opt$out_dir,"/",opt$project,".merge.sh"))
  system(paste0("bash ",opt$out_dir,"/",opt$project,".merge.sh"))
  return(outbam)
@@ -390,15 +391,33 @@ demultiplex_bam <- function(opt, bamfile, nBCs){
    print("Using python implementation to demultiplex.")
    print(Sys.time())
    max_filehandles <- as.numeric(system("ulimit -n", intern = TRUE))
    if(max_filehandles < nBCs){
      print("Warning! You cannot open enough filehandles for demultiplexing! Increase ulimit -n")
    }
    threads_perBC <- floor(max_filehandles/nBCs)
    
    if(max_filehandles < nBCs | nBCs > 10000){
      #print("Warning! You cannot open enough filehandles for demultiplexing! Increase ulimit -n")
      #break up in several demultiplexing runs to avoid choke
      nchunks <- ifelse(max_filehandles < nBCs, no = ceiling(nBCs/10000), yes = ceiling(nBCs/(max_filehandles-100)))
      if(nchunks == 1){nchunks = 2}
      print(paste("Breaking up demultiplexing in",nchunks,"chunks. This may be because you have >10000 cells or a too low filehandle limit (ulimit -n)."))
      
      full_bclist <- paste0(opt$out_dir,"/zUMIs_output/",opt$project,"kept_barcodes.txt")
      bcsplit_prefix <- paste0(opt$out_dir,"/zUMIs_output/.",opt$project,"kept_barcodes.")
      
      split_cmd <- paste0("split -a 3 -n l/",nchunks," ",full_bclist, " ", bcsplit_prefix)
      system(split_cmd)
      bclist <- list.files(path = paste0(opt$out_dir,"/zUMIs_output/"), pattern =  paste0(".",opt$project,"kept_barcodes."),all.files = TRUE, full.names = TRUE)
      header_cmd <- paste('sed -i -e \'1s/^/XC,n,cellindex\\n/\'', bclist[-1], collapse = '; ', sep = ' ')
      system(header_cmd)
      
      if(max_filehandles < nBCs){threads_perBC <- 1}
    }else{
      bclist <- paste0(opt$out_dir,"/zUMIs_output/",opt$project,"kept_barcodes.txt")
    }
    
    if(threads_perBC > 2){
      threads_perBC <- 2
    }
    threads_decompress <- opt$num_threads - threads_perBC
    bclist <- paste0(opt$out_dir,"/zUMIs_output/",opt$project,"kept_barcodes.txt")
    outstub <- paste0(opt$out_dir,"/zUMIs_output/demultiplexed/",opt$project,".")
    py_script <- paste0(opt$zUMIs_directory,"/misc/demultiplex_BC.py")
    demux_cmd <- paste(
@@ -408,7 +427,7 @@ demultiplex_bam <- function(opt, bamfile, nBCs){
      "--bc", bclist,
      "--pout", threads_perBC,
      "--pin", threads_decompress
    )
    , collapse = "; ")
  }else{
    print("Using perl implementation to demultiplex.")
    demux_cmd <- paste0(opt$zUMIs_directory,"/misc/demultiplex_BC.pl ",opt$out_dir," ",opt$project, " ", bamfile, " ", samtoolsexc )
@@ -479,6 +498,10 @@ fixMissingOptions <- function(config){
    config$counting_opts$downsampling <- "0"
  }

  if(config$counting_opts$downsampling == FALSE){
    config$counting_opts$downsampling <- "0"
  }
  
  return(config)
}

+1 −1
Original line number Diff line number Diff line
@@ -97,7 +97,7 @@ samtools_output <- paste0(" | ",samtools," view -@ ",cores_samtools," -o ",inp$o
STAR_command <- paste(STAR_command,samtools_output)

#also merge the unmapped bam files:
sammerge_command <- paste(samtools,"merge -f -@",cores_samtools,paste0(inp$out_dir,"/",inp$project,".filtered.tagged.unmapped.bam"),paste0(filtered_bams,collapse=" "))
sammerge_command <- paste(samtools,"cat -o",paste0(inp$out_dir,"/",inp$project,".filtered.tagged.unmapped.bam"),paste0(filtered_bams,collapse=" "))

#finally, run STAR
system(paste(STAR_command,"&",sammerge_command,"& wait"))
+1 −1
Original line number Diff line number Diff line
@@ -3,7 +3,7 @@
# Pipeline to run UMI-seq analysis from fastq to read count tables.
# Authors: Swati Parekh, Christoph Ziegenhain, Beate Vieth & Ines Hellmann
# Contact: sparekh@age.mpg.de or christoph.ziegenhain@ki.se
vers=2.7.1
vers=2.7.1b
currentv=`curl -s https://raw.githubusercontent.com/sdparekh/zUMIs/master/zUMIs-master.sh | grep '^vers=' | cut -f2 -d "="`
if [ "$currentv" != "$vers" ]; then echo -e "------------- \n\n Good news! A newer version of zUMIs is available at https://github.com/sdparekh/zUMIs \n\n-------------"; fi