Commit 2462b46e authored by cziegenhain's avatar cziegenhain
Browse files

smart3 umi frag stats

parent 4e23bf92
Loading
Loading
Loading
Loading
+5 −3
Original line number Diff line number Diff line
@@ -31,6 +31,8 @@ We provide a script to convert zUMIs output into loom file automatically based o
zUMIs will try to automatically do this, otherwise convert zUMIs output to loom by simply running `Rscript rds2loom.R myRun.yaml`.

## Changelog
17 Sept 2020: zUMIs.2.9.4b: Fix Smart-seq3 UMI read counting

12 Sept 2020: [zUMIs2.9.4](https://github.com/sdparekh/zUMIs/releases/tag/2.9.4): Speed writing of error-corrected UMI tags to bam file up significantly. Prevent potential crash when no cells meet any user-defined downsampling criteria.

19 July 2020: [zUMIs2.9.3](https://github.com/sdparekh/zUMIs/releases/tag/2.9.3): Add zUMIs version number to header of unmapped bam files. Several bug fixes: prevent error during mapping with memory handling; incorrect Smart-seq3 UMI-fragment counting.

fqfilter_countUMI.pl

deleted100755 → 0
+0 −80
Original line number Diff line number Diff line
#!/usr/bin/perl
use warnings;


# A script to count reads carrying UMI tags.
# Author: Swati Parekh
# Contact: sparekh@age.mpg.de or christoph.ziegenhain@ki.se

if(@ARGV != 2)
{
print
"\n#####################################################################################
Usage: perl $0 <bam> <whitelist> \n
Explanation of parameter:

bam	- Input filtered tagged bam file from zUMIs
whitelist		- whitelist BC as kept_barcodes.txt from zUMIs
######################################################################################\n\n";
exit;
}

$bam=$ARGV[0];
$whitelist=$ARGV[1];
$out=$whitelist.".BCUMIstats.txt";

#chomp $bam;
#chomp $whitelist;

open BC, "<", $whitelist || die "Couldn't open file $whitelist. Check permissions!\n Check if the file exists\n\n";

%bchash=();
while(<BC>){
	@b = split(/\,/,$_);
	$bc = $b[0];
	$bchash{$bc} = 1;
#	print $bchash{$bc};
}
close BC;

open BAMF, "samtools view -@ 2 -x BX -x NH -x AS -x nM -x HI -x IH -x NM -x uT -x MD -x jM -x jI -x XN $bam | " || die "Couldn't open file $bam. Check permissions!\n Check if it is a bam file and it exists\n\n";

%bccount=();
while(<BAMF>){
#	print $_;
	@c = split(/\t/,$_);
	$bc = $c[11];
	$umi = $c[12];

	$bc =~ s/BC:Z://;
#	$umi =~ s/UB:Z://;

	if(exists $bchash{$bc}){
		if($umi =~ m/UB:Z:$/){
			$bccount{$bc}{"empty"}++;
		#	print "empty\n";
		}else{
			$bccount{$bc}{"umi"}++;
		#	print "umi\n";
		}
	}
}
close BAMF;

open OUT, ">", $out || die "Couldn't open file $out to write\n\n";
print OUT "XC\tnNontagged\tnUMItag\n";
foreach $key (keys %bccount){
	if( exists $bccount{$key}{"empty"}){
		$nempty = $bccount{$key}{"empty"};
	}
	else{
		$nempty = 0;
	}
	if( exists $bccount{$key}{"umi"}){
		$numi =  $bccount{$key}{"umi"};
	}else{
		$numi = 0;
	}
	print OUT $key,"\t",$nempty,"\t",$numi,"\n";
}
close OUT;

misc/countUMIfrags.py

0 → 100755
+56 −0
Original line number Diff line number Diff line
#!/usr/bin/env python3
import pysam
import argparse

def load_bcs(bcpath):
    with open(bcpath) as f:
        x = f.readline() # remove header
        y = f.readlines()
        bc = []
        for l in y:
            l = l.split(',')
            bc.append(l[0])
    return(bc)

def count_UMItags(inpath, bcs, threads, outpath):
    bccounts = {}
    for b in bcs:
        bccounts[b] = {}
        bccounts[b]['umi'] = 0
        bccounts[b]['int'] = 0

    inp = pysam.AlignmentFile(inpath, 'rb', threads = threads)
    for read in inp:
        bc = read.get_tag('BC')
        if bc in bcs:
            ub = read.get_tag('UB')
            if ub is '':
                bccounts[bc]['int'] += 1
            else:
                 bccounts[bc]['umi'] += 1

    inp.close()
    with open(outpath, 'w') as out:
        out.write('XC\tnNontagged\tnUMItag\n')
        for bc in bccounts:
            out.write(bc+'\t'+str(bccounts[bc]['int'])+'\t'+str(bccounts[bc]['umi'])+'\n')



def main():
    parser = argparse.ArgumentParser(add_help=True)
    parser.add_argument('--bam', type=str, metavar='FILENAME',
                        help='Path to input BAM file')
    parser.add_argument('--p', type=int, default = 10,
                        help='Number of processes for bams')
    parser.add_argument('--bcs', type=str, metavar='FILENAME',
                        help='Path to kept barcodes')

    args = parser.parse_args()

    bcs = load_bcs(args.bcs)

    count_UMItags(inpath = args.bam, bcs = bcs, threads = args.p, outpath = args.bcs+".BCUMIstats.txt")

if __name__ == "__main__":
    main()
+2 −2
Original line number Diff line number Diff line
@@ -48,14 +48,14 @@ if(is.null(opt$read_layout)){
############### in case of smart3, check UMI fragment counts
if(any(grepl(pattern = "ATTGCGCAATG",x = unlist(opt$sequence_files)))){
  print("Counting UMI fragments...")
  script_filepath <- paste0(opt$zUMIs_directory,"/fqfilter_countUMI.pl")
  script_filepath <- paste0(opt$zUMIs_directory,"/misc/countUMIfrags.py")
  bam_filepath <- featfile
  if(opt$barcodes$BarcodeBinning > 0){
    bc_filepath <- paste0(opt$out_dir,"/zUMIs_output/",opt$project,"kept_barcodes_binned.txt")
  }else{
    bc_filepath <- paste0(opt$out_dir,"/zUMIs_output/",opt$project,"kept_barcodes.txt")
  }
  system(paste(script_filepath,bam_filepath,bc_filepath,"&"))
  system(paste(script_filepath,'--bam',bam_filepath,'--bcs',bc_filepath,'--p',opt$num_threads,"&"))
}

# GeneCounts --------------------------------------------------------------
+1 −1
Original line number Diff line number Diff line
@@ -3,7 +3,7 @@
# Pipeline to run UMI-seq analysis from fastq to read count tables.
# Authors: Swati Parekh, Christoph Ziegenhain, Beate Vieth & Ines Hellmann
# Contact: sparekh@age.mpg.de or christoph.ziegenhain@ki.se
vers=2.9.4
vers=2.9.4b
currentv=$(curl -s https://raw.githubusercontent.com/sdparekh/zUMIs/main/zUMIs.sh | grep '^vers=' | cut -f2 -d "=")
if [ "$currentv" != "$vers" ] ; then
    echo -e "------------- \n\n Good news! A newer version of zUMIs is available at https://github.com/sdparekh/zUMIs \n\n-------------";