Commit bfafb043 authored by Christoph's avatar Christoph
Browse files

Smart3 strandedness

parent 7235a89c
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -29,6 +29,8 @@ We provide a script to convert zUMIs output into loom file automatically based o
zUMIs will try to automatically do this, otherwise convert zUMIs output to loom by simply running `Rscript rds2loom.R myRun.yaml`.

## Changelog
16 Mar 2020: zUMIs2.7.1: Smart-seq3 data can be run with the proper consideration of strand information. When setting `strand: 1`, UMI reads will use this strand while non-UMI reads will stay unstranded.

19 Feb 2020: [zUMIs2.7.0 released](https://github.com/sdparekh/zUMIs/releases/tag/2.7.0): Simplify installation greatly by a cond-pack of miniconda with all dependencies. The conda environment is used with `zUMIs-master.sh -c`, with the old behavior staying as the default. 

13 Feb 2020: zUMIs2.6.3: Faster demultiplexing using python/pysam. Made forking of UMI collapse worker threads more robust.
+13 −0
Original line number Diff line number Diff line
@@ -417,6 +417,19 @@ demultiplex_bam <- function(opt, bamfile, nBCs){
  print(Sys.time())
}

split_bam <- function(bam, cpu, samtoolsexc){
  UMIbam <- paste0(bam,".UMI.bam")
  internalbam <- paste0(bam,".internal.bam")
  cpus <- floor((cpu-4)/2)
  if(cpus<1){
    cpus <- 2
  }
  
  cmd_umi <- paste(samtoolsexc, "view -h -@ 2", bam, " | grep -v -P 'UB:Z:\t' | ", samtoolsexc, "view -b -@",cpus,"-o",UMIbam,"&")
  cmd_internal <- paste(samtoolsexc, "view -h -@ 2", bam, " | grep -v 'UB:Z:[A-Z]' | ", samtoolsexc, "view -b -@",cpus,"-o",internalbam,"&")
  system(paste(cmd_umi,cmd_internal,"wait"))
  return(c(internalbam,UMIbam))
}

fixMissingOptions <- function(config){
  if(is.null(config$barcodes$automatic)){
+46 −24
Original line number Diff line number Diff line
@@ -52,6 +52,28 @@ bccount<-splitRG(bccount=bccount, mem= opt$mem_limit)

saf<-.makeSAF(paste0(opt$out_dir,"/",opt$project,".final_annot.gtf"))
abamfile<-paste0(opt$out_dir,"/",opt$project,".filtered.tagged.Aligned.out.bam")
outbamfile <-paste0(opt$out_dir,"/",opt$project,".filtered.Aligned.GeneTagged.bam")

if(smart3_flag & opt$counting_opts$strand == 1){
  #split bam in UMU ends and internal
  tmp_bams <- split_bam(bam = abamfile, cpu = opt$num_threads, samtoolsexc=samtoolsexc)
  
  #assign features with appropriate strand
  fnex_int<-.runFeatureCount(tmp_bams[1], saf=saf$exons, strand=0, type="ex", primaryOnly = opt$counting_opts$primaryHit, cpu = opt$num_threads, mem = opt$mem_limit, fcounts_clib = fcounts_clib)
  fnex_umi<-.runFeatureCount(tmp_bams[2], saf=saf$exons, strand=1, type="ex", primaryOnly = opt$counting_opts$primaryHit, cpu = opt$num_threads, mem = opt$mem_limit, fcounts_clib = fcounts_clib)
  ffiles_int <- paste0(fnex_int,".tmp")
  ffiles_umi <- paste0(fnex_umi,".tmp")
  
  if(opt$counting_opts$introns){
    fnin_int<-.runFeatureCount(ffiles_int, saf=saf$introns, strand=0, type="in", primaryOnly = opt$counting_opts$primaryHit, cpu = opt$num_threads, mem = opt$mem_limit, fcounts_clib = fcounts_clib)
    fnin_umi<-.runFeatureCount(ffiles_umi, saf=saf$introns, strand=1, type="in", primaryOnly = opt$counting_opts$primaryHit, cpu = opt$num_threads, mem = opt$mem_limit, fcounts_clib = fcounts_clib)
    ffiles_int <- paste0(fnin_int,".tmp")
    ffiles_umi <- paste0(fnin_umi,".tmp")
  }
  join_bam_cmd <- paste(samtoolsexc, "cat -o", outbamfile, ffiles_int, ffiles_umi)
  system(join_bam_cmd)
  system(paste0("rm ",tmp_bams[1],"* ",tmp_bams[2],"*"))
}else{
  fnex<-.runFeatureCount(abamfile,
                         saf=saf$exons,
                         strand=opt$counting_opts$strand,
@@ -75,15 +97,15 @@ if(opt$counting_opts$introns){
    ffiles<-paste0(fnin,".tmp")
  }
  
  system(paste("mv",ffiles,outbamfile))
}

if(is.null(opt$mem_limit)){
  mempercpu <- max(round(100/opt$num_threads,0),1)
}else{
  mempercpu <- max(round(opt$mem_limit/opt$num_threads,0),1)
}

outbamfile <-paste0(opt$out_dir,"/",opt$project,".filtered.Aligned.GeneTagged.bam")
system(paste("mv",ffiles,outbamfile))

if(opt$counting_opts$Ham_Dist == 0){
  sortbamfile <-paste0(opt$out_dir,"/",opt$project,".filtered.Aligned.GeneTagged.sorted.bam")
}else{
+1 −1
Original line number Diff line number Diff line
@@ -3,7 +3,7 @@
# Pipeline to run UMI-seq analysis from fastq to read count tables.
# Authors: Swati Parekh, Christoph Ziegenhain, Beate Vieth & Ines Hellmann
# Contact: sparekh@age.mpg.de or christoph.ziegenhain@ki.se
vers=2.7.0b
vers=2.7.1
currentv=`curl -s https://raw.githubusercontent.com/sdparekh/zUMIs/master/zUMIs-master.sh | grep '^vers=' | cut -f2 -d "="`
if [ "$currentv" != "$vers" ]; then echo -e "------------- \n\n Good news! A newer version of zUMIs is available at https://github.com/sdparekh/zUMIs \n\n-------------"; fi