Commit e551dc3d authored by cziegenhain's avatar cziegenhain
Browse files

small fixes

parent 1e6ace96
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -31,7 +31,7 @@ We provide a script to convert zUMIs output into loom file automatically based o
zUMIs will try to automatically do this, otherwise convert zUMIs output to loom by simply running `Rscript rds2loom.R myRun.yaml`.

## Changelog
18 Sept 2020: zUMIs.2.9.4b/c: Fix Smart-seq3 UMI read counting. Prevent crash when a chunk of cell BCs does not match any downsampling. Speed up barcode detection steps for some cases.
18 Sept 2020: zUMIs.2.9.4b/c/d: Fix & speed up Smart-seq3 UMI read counting. Prevent crash when a chunk of cell BCs does not match any downsampling. Speed up barcode detection steps for some cases. Prevent too much CPU usage in UMI error correction.

12 Sept 2020: [zUMIs2.9.4](https://github.com/sdparekh/zUMIs/releases/tag/2.9.4): Speed writing of error-corrected UMI tags to bam file up significantly. Prevent potential crash when no cells meet any user-defined downsampling criteria.

+2 −1
Original line number Diff line number Diff line
@@ -185,6 +185,7 @@ hammingFilter<-function(umiseq, edit=1, gbcid=NULL){
}

ham_helper_fun <- function(x){
    setDTthreads(1)
    x[, gbcid := paste(RG,GE,sep="_")]
    x_list <- split(x = x, drop = T, by = c("gbcid"), sorted = T, keep.by = T)
    out_list <- lapply(x_list, function(x) hammingFilter(x[!is.na(UB)]$UB, edit=opt$counting_opts$Ham_Dist, gbcid=unique(x$gbcid)) )
+1 −0
Original line number Diff line number Diff line
@@ -165,6 +165,7 @@ setDownSamplingOption<-function( down ,bccount, filename=NULL){
    
    #first check if a partial barcode matches the length of the whitelist
    bc_definition <- sapply(opt$sequence_files, function(x) grep("BC", x$base_definition, value = T))
    bc_definition <- bc_definition[which(sapply(bc_definition, length)>0)]
    if(length(bc_definition)>1){ #this only makes sense if there are at least 2 BC pieces defined
      bc_definition <- sapply(bc_definition, function(x) substr(x = x, start = 4, stop = nchar(x)-1))
      bc_len_mat <- t(matrix(as.numeric(unlist(strsplit(bc_definition, "-"))), ncol = length(bc_definition)))
+27 −6
Original line number Diff line number Diff line
#!/usr/bin/env python3
import pysam
import argparse
import multiprocessing as mp

def load_bcs(bcpath):
    with open(bcpath) as f:
@@ -12,15 +13,14 @@ def load_bcs(bcpath):
            bc.append(l[0])
    return(bc)

def count_UMItags(inpath, bcs, threads, outpath):
def count_UMItags(inpath, bcs, chr):
    bccounts = {}
    for b in bcs:
        bccounts[b] = {}
        bccounts[b]['umi'] = 0
        bccounts[b]['int'] = 0

    inp = pysam.AlignmentFile(inpath, 'rb', threads = threads)
    for read in inp:
    inp = pysam.AlignmentFile(inpath, 'rb')
    for read in inp.fetch(chr):
        bc = read.get_tag('BC')
        if bc in bcs:
            ub = read.get_tag('UB')
@@ -28,8 +28,15 @@ def count_UMItags(inpath, bcs, threads, outpath):
                bccounts[bc]['int'] += 1
            else:
                 bccounts[bc]['umi'] += 1

    inp.close()
    return(bccounts)


def collect_write_stats(chrcounts, outpath):
    bccounts = chrcounts.pop(0) #get first dict
    for b in bccounts: #for every cell collect counts
        bccounts[b]['umi'] += sum( [chrcounts[i][b]['umi'] for i in range(len(chrcounts))] )
        bccounts[b]['int'] += sum( [chrcounts[i][b]['int'] for i in range(len(chrcounts))] )
    with open(outpath, 'w') as out:
        out.write('XC\tnNontagged\tnUMItag\n')
        for bc in bccounts:
@@ -50,7 +57,21 @@ def main():

    bcs = load_bcs(args.bcs)

    count_UMItags(inpath = args.bam, bcs = bcs, threads = args.p, outpath = args.bcs+".BCUMIstats.txt")
    inp = pysam.AlignmentFile(args.bam, 'rb')
    chrs = list(inp.references)
    chrs.append('*') #don't forget unmapped reads
    inp.close()

    if(args.p > len(chrs)):
        num_threads = len(chrs)
    else:
        num_threads = args.p

    pool = mp.Pool(num_threads)
    results = [pool.apply_async(count_UMItags, (args.bam, bcs, chr, )) for chr in chrs]
    x = [r.get() for r in results]

    collect_write_stats(x, args.bcs+".BCUMIstats.txt")

if __name__ == "__main__":
    main()
+1 −1
Original line number Diff line number Diff line
@@ -3,7 +3,7 @@
# Pipeline to run UMI-seq analysis from fastq to read count tables.
# Authors: Swati Parekh, Christoph Ziegenhain, Beate Vieth & Ines Hellmann
# Contact: sparekh@age.mpg.de or christoph.ziegenhain@ki.se
vers=2.9.4c
vers=2.9.4d
currentv=$(curl -s https://raw.githubusercontent.com/sdparekh/zUMIs/main/zUMIs.sh | grep '^vers=' | cut -f2 -d "=")
if [ "$currentv" != "$vers" ] ; then
    echo -e "------------- \n\n Good news! A newer version of zUMIs is available at https://github.com/sdparekh/zUMIs \n\n-------------";