small fixes (e551dc3d) · Commits · github_fork / ZUMIs

README.md

+1 −1

Original line number	Diff line number	Diff line
		@@ -31,7 +31,7 @@ We provide a script to convert zUMIs output into loom file automatically based o
		zUMIs will try to automatically do this, otherwise convert zUMIs output to loom by simply running `Rscript rds2loom.R myRun.yaml`.

		## Changelog
		18 Sept 2020: zUMIs.2.9.4b/c: Fix Smart-seq3 UMI read counting. Prevent crash when a chunk of cell BCs does not match any downsampling. Speed up barcode detection steps for some cases.
		18 Sept 2020: zUMIs.2.9.4b/c/d: Fix & speed up Smart-seq3 UMI read counting. Prevent crash when a chunk of cell BCs does not match any downsampling. Speed up barcode detection steps for some cases. Prevent too much CPU usage in UMI error correction.

		12 Sept 2020: [zUMIs2.9.4](https://github.com/sdparekh/zUMIs/releases/tag/2.9.4): Speed writing of error-corrected UMI tags to bam file up significantly. Prevent potential crash when no cells meet any user-defined downsampling criteria.

UMIstuffFUN.R

+2 −1

Original line number	Diff line number	Diff line
		@@ -185,6 +185,7 @@ hammingFilter<-function(umiseq, edit=1, gbcid=NULL){
		}

		ham_helper_fun <- function(x){
		setDTthreads(1)
		x[, gbcid := paste(RG,GE,sep="_")]
		x_list <- split(x = x, drop = T, by = c("gbcid"), sorted = T, keep.by = T)
		out_list <- lapply(x_list, function(x) hammingFilter(x[!is.na(UB)]$UB, edit=opt$counting_opts$Ham_Dist, gbcid=unique(x$gbcid)) )

barcodeIDFUN.R

+1 −0

Original line number	Diff line number	Diff line
		@@ -165,6 +165,7 @@ setDownSamplingOption<-function( down ,bccount, filename=NULL){

		#first check if a partial barcode matches the length of the whitelist
		bc_definition <- sapply(opt$sequence_files, function(x) grep("BC", x$base_definition, value = T))
		bc_definition <- bc_definition[which(sapply(bc_definition, length)>0)]
		if(length(bc_definition)>1){ #this only makes sense if there are at least 2 BC pieces defined
		bc_definition <- sapply(bc_definition, function(x) substr(x = x, start = 4, stop = nchar(x)-1))
		bc_len_mat <- t(matrix(as.numeric(unlist(strsplit(bc_definition, "-"))), ncol = length(bc_definition)))

misc/countUMIfrags.py

+27 −6

Original line number	Diff line number	Diff line
		#!/usr/bin/env python3
		import pysam
		import argparse
		import multiprocessing as mp

		def load_bcs(bcpath):
		with open(bcpath) as f:
		@@ -12,15 +13,14 @@ def load_bcs(bcpath):
		bc.append(l[0])
		return(bc)

		def count_UMItags(inpath, bcs, threads, outpath):
		def count_UMItags(inpath, bcs, chr):
		bccounts = {}
		for b in bcs:
		bccounts[b] = {}
		bccounts[b]['umi'] = 0
		bccounts[b]['int'] = 0

		inp = pysam.AlignmentFile(inpath, 'rb', threads = threads)
		for read in inp:
		inp = pysam.AlignmentFile(inpath, 'rb')
		for read in inp.fetch(chr):
		bc = read.get_tag('BC')
		if bc in bcs:
		ub = read.get_tag('UB')
		@@ -28,8 +28,15 @@ def count_UMItags(inpath, bcs, threads, outpath):
		bccounts[bc]['int'] += 1
		else:
		bccounts[bc]['umi'] += 1

		inp.close()
		return(bccounts)


		def collect_write_stats(chrcounts, outpath):
		bccounts = chrcounts.pop(0) #get first dict
		for b in bccounts: #for every cell collect counts
		bccounts[b]['umi'] += sum( [chrcounts[i][b]['umi'] for i in range(len(chrcounts))] )
		bccounts[b]['int'] += sum( [chrcounts[i][b]['int'] for i in range(len(chrcounts))] )
		with open(outpath, 'w') as out:
		out.write('XC\tnNontagged\tnUMItag\n')
		for bc in bccounts:
		@@ -50,7 +57,21 @@ def main():

		bcs = load_bcs(args.bcs)

		count_UMItags(inpath = args.bam, bcs = bcs, threads = args.p, outpath = args.bcs+".BCUMIstats.txt")
		inp = pysam.AlignmentFile(args.bam, 'rb')
		chrs = list(inp.references)
		chrs.append('*') #don't forget unmapped reads
		inp.close()

		if(args.p > len(chrs)):
		num_threads = len(chrs)
		else:
		num_threads = args.p

		pool = mp.Pool(num_threads)
		results = [pool.apply_async(count_UMItags, (args.bam, bcs, chr, )) for chr in chrs]
		x = [r.get() for r in results]

		collect_write_stats(x, args.bcs+".BCUMIstats.txt")

		if __name__ == "__main__":
		main()

zUMIs.sh

+1 −1

Original line number	Diff line number	Diff line
		@@ -3,7 +3,7 @@
		# Pipeline to run UMI-seq analysis from fastq to read count tables.
		# Authors: Swati Parekh, Christoph Ziegenhain, Beate Vieth & Ines Hellmann
		# Contact: sparekh@age.mpg.de or christoph.ziegenhain@ki.se
		vers=2.9.4c
		vers=2.9.4d
		currentv=$(curl -s https://raw.githubusercontent.com/sdparekh/zUMIs/main/zUMIs.sh \| grep '^vers=' \| cut -f2 -d "=")
		if [ "$currentv" != "$vers" ] ; then
		echo -e "------------- \n\n Good news! A newer version of zUMIs is available at https://github.com/sdparekh/zUMIs \n\n-------------";

Admin message