samtools path (68150255) · Commits · github_fork / ZUMIs

README.md

+1 −1

Original line number	Diff line number	Diff line
		@@ -31,7 +31,7 @@ We provide a script to convert zUMIs output into loom file automatically based o
		zUMIs will try to automatically do this, otherwise convert zUMIs output to loom by simply running `Rscript rds2loom.R myRun.yaml`.

		## Changelog
		18 Sept 2020: zUMIs.2.9.4b/c/d: Fix & speed up Smart-seq3 UMI read counting. Prevent crash when a chunk of cell BCs does not match any downsampling. Speed up barcode detection steps for some cases. Prevent too much CPU usage in UMI error correction.
		18 Sept 2020: zUMIs.2.9.4b/c/d/e: Fix & speed up Smart-seq3 UMI read counting. Prevent crash when a chunk of cell BCs does not match any downsampling. Speed up barcode detection steps for some cases. Prevent too much CPU usage in UMI error correction. Take correct samtools executable in gene annotation parsing.

		12 Sept 2020: [zUMIs2.9.4](https://github.com/sdparekh/zUMIs/releases/tag/2.9.4): Speed writing of error-corrected UMI tags to bam file up significantly. Prevent potential crash when no cells meet any user-defined downsampling criteria.

runfeatureCountFUN.R

+5 −5

Original line number	Diff line number	Diff line
		@@ -18,8 +18,8 @@ suppressWarnings(suppressMessages(require(AnnotationDbi)))
		# }

		# reads information from Bam file on lengths and names of chromosomes/scaffolds. Can exclude scaffolds based on their size if desired.
		.chromLengthFilter <- function(abamfile, keep_chr_minLength =0){
		bread<- paste("samtools view -H ", abamfile , "\| grep '^@SQ' \| cut -f2,3 ")
		.chromLengthFilter <- function(abamfile, keep_chr_minLength =0, samtoolsexc){
		bread<- paste(samtoolsexc,"view -H ", abamfile , "\| grep '^@SQ' \| cut -f2,3 ")
		chr<-data.table::fread(bread,col.names = c("chr","len"), header = F)[
		, chr2 :=gsub("SN:","",chr)][
		, len2 :=gsub("LN:","",len)][
		@@ -212,7 +212,7 @@ get_gr <- function(y){GenomicRanges::makeGRangesFromDataFrame(y,
		}

		#new version of makeSAF by Chris Alford
		.makeSAF<-function(gtf, extension_var=FALSE, exon_extension=0, buffer_length=100, scaff_length=0,multi_overlap_var=FALSE){
		.makeSAF<-function(gtf, extension_var=FALSE, exon_extension=0, buffer_length=100, scaff_length=0,multi_overlap_var=FALSE, samtoolsexc){
		print("Loading reference annotation from:")
		print(gtf)
		gtf_file <- plyranges::read_gff(gtf, col_names = c("type","gene_id"))
		@@ -241,7 +241,7 @@ get_gr <- function(y){GenomicRanges::makeGRangesFromDataFrame(y,
		#Calculating total intergenic length for intron-probability
		totalGeneLength<-sum(width(gr.gene))
		#get length of chromosomes from BAM header
		chrL<-system( paste("samtools view -H", abamfile ," \| grep '^@SQ' \| cut -f3 "), intern = T)
		chrL<-system( paste(samtoolsexc,"view -H", abamfile ," \| grep '^@SQ' \| cut -f3 "), intern = T)
		chrL<-lapply(chrL, function(x){
		x<-as.integer(strsplit(x, ":")[[1]][[2]])
		})
		@@ -249,7 +249,7 @@ get_gr <- function(y){GenomicRanges::makeGRangesFromDataFrame(y,
		intergenicBp<- chrL-totalGeneLength

		# getting names and length of chromosomes/scaffolds from bam header
		chrom_kept <- .chromLengthFilter(abamfile = abamfile, keep_chr_minLength = scaff_length)
		chrom_kept <- .chromLengthFilter(abamfile = abamfile, keep_chr_minLength = scaff_length, samtoolsexc = samtoolsexc)

		# reading out chromosomes/scaffold names from the GRange gtf object
		chrom_gtf_names <- tibble::as_tibble(x = list("chrom_names" = as.character(seqinfo(gtf_file)@seqnames)),

zUMIs-dge2.R

+2 −1

Original line number	Diff line number	Diff line
		@@ -60,7 +60,8 @@ saf<-.makeSAF(gtf = paste0(opt$out_dir,"/",opt$project,".final_annot.gtf"),
		exon_extension = opt$reference$extension_length,
		buffer_length = (opt$reference$extension_length / 2),
		scaff_length = opt$reference$scaffold_length_min,
		multi_overlap_var = opt$counting_opts$multi_overlap)
		multi_overlap_var = opt$counting_opts$multi_overlap,
		samtoolsexc = samtoolsexc)
		try(gene_name_mapping <- .get_gene_names(gtf = paste0(opt$out_dir,"/",opt$project,".final_annot.gtf"), threads = opt$num_threads), silent = TRUE)
		try(data.table::fwrite(gene_name_mapping, file = paste0(opt$out_dir,"/zUMIs_output/expression/",opt$project,".gene_names.txt"), sep ="\t", quote = FALSE), silent = TRUE)
		##

zUMIs.sh

+1 −1

Original line number	Diff line number	Diff line
		@@ -3,7 +3,7 @@
		# Pipeline to run UMI-seq analysis from fastq to read count tables.
		# Authors: Swati Parekh, Christoph Ziegenhain, Beate Vieth & Ines Hellmann
		# Contact: sparekh@age.mpg.de or christoph.ziegenhain@ki.se
		vers=2.9.4d
		vers=2.9.4e
		currentv=$(curl -s https://raw.githubusercontent.com/sdparekh/zUMIs/main/zUMIs.sh \| grep '^vers=' \| cut -f2 -d "=")
		if [ "$currentv" != "$vers" ] ; then
		echo -e "------------- \n\n Good news! A newer version of zUMIs is available at https://github.com/sdparekh/zUMIs \n\n-------------";

Admin message