Commit 68150255 authored by cziegenhain's avatar cziegenhain
Browse files

samtools path

parent e551dc3d
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -31,7 +31,7 @@ We provide a script to convert zUMIs output into loom file automatically based o
zUMIs will try to automatically do this, otherwise convert zUMIs output to loom by simply running `Rscript rds2loom.R myRun.yaml`.

## Changelog
18 Sept 2020: zUMIs.2.9.4b/c/d: Fix & speed up Smart-seq3 UMI read counting. Prevent crash when a chunk of cell BCs does not match any downsampling. Speed up barcode detection steps for some cases. Prevent too much CPU usage in UMI error correction.
18 Sept 2020: zUMIs.2.9.4b/c/d/e: Fix & speed up Smart-seq3 UMI read counting. Prevent crash when a chunk of cell BCs does not match any downsampling. Speed up barcode detection steps for some cases. Prevent too much CPU usage in UMI error correction. Take correct samtools executable in gene annotation parsing.

12 Sept 2020: [zUMIs2.9.4](https://github.com/sdparekh/zUMIs/releases/tag/2.9.4): Speed writing of error-corrected UMI tags to bam file up significantly. Prevent potential crash when no cells meet any user-defined downsampling criteria.

+5 −5
Original line number Diff line number Diff line
@@ -18,8 +18,8 @@ suppressWarnings(suppressMessages(require(AnnotationDbi)))
# }

# reads information from Bam file on lengths and names of chromosomes/scaffolds. Can exclude scaffolds based on their size if desired.
.chromLengthFilter <- function(abamfile, keep_chr_minLength =0){
  bread<- paste("samtools view -H ", abamfile , "| grep '^@SQ' | cut -f2,3 ")
.chromLengthFilter <- function(abamfile, keep_chr_minLength =0, samtoolsexc){
  bread<- paste(samtoolsexc,"view -H ", abamfile , "| grep '^@SQ' | cut -f2,3 ")
  chr<-data.table::fread(bread,col.names = c("chr","len"), header = F)[
                              , chr2 :=gsub("SN:","",chr)][ 
                              , len2 :=gsub("LN:","",len)][
@@ -212,7 +212,7 @@ get_gr <- function(y){GenomicRanges::makeGRangesFromDataFrame(y,
}

#new version of makeSAF by Chris Alford
.makeSAF<-function(gtf, extension_var=FALSE, exon_extension=0, buffer_length=100, scaff_length=0,multi_overlap_var=FALSE){
.makeSAF<-function(gtf, extension_var=FALSE, exon_extension=0, buffer_length=100, scaff_length=0,multi_overlap_var=FALSE, samtoolsexc){
  print("Loading reference annotation from:")
  print(gtf)
  gtf_file <- plyranges::read_gff(gtf, col_names = c("type","gene_id"))
@@ -241,7 +241,7 @@ get_gr <- function(y){GenomicRanges::makeGRangesFromDataFrame(y,
  #Calculating total intergenic length for intron-probability
  totalGeneLength<-sum(width(gr.gene))
  #get length of chromosomes from BAM header
  chrL<-system( paste("samtools view -H", abamfile ," | grep '^@SQ' | cut -f3 "), intern = T)
  chrL<-system( paste(samtoolsexc,"view -H", abamfile ," | grep '^@SQ' | cut -f3 "), intern = T)
  chrL<-lapply(chrL, function(x){
    x<-as.integer(strsplit(x, ":")[[1]][[2]])
  })
@@ -249,7 +249,7 @@ get_gr <- function(y){GenomicRanges::makeGRangesFromDataFrame(y,
  intergenicBp<- chrL-totalGeneLength

  # getting names and length of chromosomes/scaffolds from bam header
  chrom_kept <- .chromLengthFilter(abamfile = abamfile, keep_chr_minLength = scaff_length)
  chrom_kept <- .chromLengthFilter(abamfile = abamfile, keep_chr_minLength = scaff_length, samtoolsexc = samtoolsexc)

  # reading out chromosomes/scaffold names from the GRange gtf object
  chrom_gtf_names <- tibble::as_tibble(x = list("chrom_names" = as.character(seqinfo(gtf_file)@seqnames)),
+2 −1
Original line number Diff line number Diff line
@@ -60,7 +60,8 @@ saf<-.makeSAF(gtf = paste0(opt$out_dir,"/",opt$project,".final_annot.gtf"),
              exon_extension = opt$reference$extension_length,
              buffer_length = (opt$reference$extension_length / 2),
              scaff_length = opt$reference$scaffold_length_min,
              multi_overlap_var = opt$counting_opts$multi_overlap)
              multi_overlap_var = opt$counting_opts$multi_overlap,
              samtoolsexc = samtoolsexc)
try(gene_name_mapping <- .get_gene_names(gtf = paste0(opt$out_dir,"/",opt$project,".final_annot.gtf"), threads = opt$num_threads), silent = TRUE)
try(data.table::fwrite(gene_name_mapping, file = paste0(opt$out_dir,"/zUMIs_output/expression/",opt$project,".gene_names.txt"), sep ="\t", quote = FALSE), silent = TRUE)
##
+1 −1
Original line number Diff line number Diff line
@@ -3,7 +3,7 @@
# Pipeline to run UMI-seq analysis from fastq to read count tables.
# Authors: Swati Parekh, Christoph Ziegenhain, Beate Vieth & Ines Hellmann
# Contact: sparekh@age.mpg.de or christoph.ziegenhain@ki.se
vers=2.9.4d
vers=2.9.4e
currentv=$(curl -s https://raw.githubusercontent.com/sdparekh/zUMIs/main/zUMIs.sh | grep '^vers=' | cut -f2 -d "=")
if [ "$currentv" != "$vers" ] ; then
    echo -e "------------- \n\n Good news! A newer version of zUMIs is available at https://github.com/sdparekh/zUMIs \n\n-------------";