approx ss3 pattern (e105a3ad) · Commits · github_fork / ZUMIs

README.md

+2 −0

Original line number	Diff line number	Diff line
		@@ -29,6 +29,8 @@ We provide a script to convert zUMIs output into loom file automatically based o
		zUMIs will try to automatically do this, otherwise convert zUMIs output to loom by simply running `Rscript rds2loom.R myRun.yaml`.

		## Changelog
		15 May 2020: zUMIs2.8.1: Smart-seq3 UMI pattern detection now takes one hamming distance error into account.

		02 May 2020: zUMIs2.8.0: Ensure correct behaviour when resuming from mapping (`which_stage: Mapping`). It is now possible to fractionally count reads overlapping multiple features (on the same strand if strand-specificity is used) `multi_overlap: yes`. New function for parsing the gene annotations: Fixed a problem with genes completely within other another gene's intron & added the possibility to extend genomic interval of exons (this is useful for crossmapping species with poor annotations). Added a probability calculation to compare the occurance of intronic reads to an expectation generated from background intergenic mapping reads (`intronProb: yes`). Added statistics output for the total number of reads observed per gene.

		22 Apr 2020: zUMIs2.7.3: zUMIs will try to parse a geneID to gene name mapping file from the user provided GTF annotations.

fqfilter_v2.pl

+30 −13

Original line number	Diff line number	Diff line
		@@ -21,7 +21,7 @@ $tmpPrefix=$ARGV[5];
		}
		use lib "$zumisdir";
		use distilReads;
		#use Approx;
		use Approx;

		open(YL,"$rscriptexc $zumisdir/readYaml4fqfilter.R $yml \|");
		@arg=<YL>;
		@@ -127,13 +127,13 @@ while(<$fh1>){
		# This block checks if smart-seq3 pattern is present and if it is found in the reads
		# If it is smart-seq3 pattern in the YAML file but not found in the read then the read is retained as full cDNA read where UMI is null.
		if($p2 eq "ATTGCGCAATG"){
		if($mcrseq !~ m/^$checkpattern/){
		$ss3 = "nopattern";
		$checkpattern = $mcrseq;
		# print "nopattern\n";
		}else{
		$a = substr($mcrseq,0,length($p2));
		if(Approx::amatch($checkpattern, [ 1 ],$a)){
		$ss3 = "yespattern";
		$checkpattern = $p2;
		}else{
		$ss3 = "nopattern";
		$checkpattern = $mcrseq;
		}
		}

		@@ -192,13 +192,13 @@ while(<$fh1>){
		# This block checks if smart-seq3 pattern is present and if it is found in the reads
		# If it is smart-seq3 pattern in the YAML file but not found in the read then the read is retained as full cDNA read where UMI is null.
		if($pf eq "ATTGCGCAATG"){
		if($mcrseq !~ m/^$checkpattern/){
		$ss3 = "nopattern";
		$checkpattern = $mcrseq;
		# print "nopattern\n";
		}else{
		$af = substr($mcrseq,0,length($pf));
		if(Approx::amatch($checkpattern, [ 1 ],$af)){
		$ss3 = "yespattern";
		$checkpattern = $pf;
		}else{
		$ss3 = "nopattern";
		$checkpattern = $mcrseq;
		}
		}

		@@ -257,11 +257,28 @@ while(<$fh1>){
		$btmp = grep {$_ < $bcthres[1]} @bquals;
		$mtmp = grep {$_ < $umithres[1]} @mquals;

		## Check if it is a smartseq3 pattern. If so, allow for approximate match with 1 base distance. If it is not a smartseq3 pattern, check if the read starts with the pattern at all. The $goahead variable decides if the read passes the quality threshold of a pattern.
		# IF the read should not have any pattern, the $checkpattern is equal to $mcrseq so $goahead variable will stay "yes"
		if($checkpattern eq "ATTGCGCAATG"){
		$ac = substr($mcrseq,0,length($checkpattern));
		if(Approx::amatch($checkpattern, [ 1 ],$ac)){
		$goahead = "yes";
		}else{
		$goahead = "no";
		}
		}else{
		if($mcrseq =~ m/^$checkpattern/){
		$goahead = "yes";
		}else{
		$goahead = "no";
		}
		}
		# print out only if above the quality threshold

		#if(($btmp < $bcthres[0]) && ($mtmp < $umithres[0]) && ( Approx::amatch($checkpattern, [ 1 ],$mcrseq) ) && ($isPass ne "fail")){
		if(($btmp < $bcthres[0]) && ($mtmp < $umithres[0]) && ($mcrseq =~ m/^$checkpattern/) && ($isPass ne "fail")){
		#if(($btmp < $bcthres[0]) && ($mtmp < $umithres[0]) && ($mcrseq =~ m/^$checkpattern/) && ($isPass ne "fail")){
		#if(($btmp < $bcthres[0]) && ($mtmp < $umithres[0])){

		if(($btmp < $bcthres[0]) && ($mtmp < $umithres[0]) && ($goahead eq "yes") && ($isPass ne "fail")){
		chomp($rid);

		if($rid =~ m/^\@.*\s/){

zUMIs-master.sh

+1 −1

Original line number	Diff line number	Diff line
		@@ -3,7 +3,7 @@
		# Pipeline to run UMI-seq analysis from fastq to read count tables.
		# Authors: Swati Parekh, Christoph Ziegenhain, Beate Vieth & Ines Hellmann
		# Contact: sparekh@age.mpg.de or christoph.ziegenhain@ki.se
		vers=2.8.0
		vers=2.8.1
		currentv=`curl -s https://raw.githubusercontent.com/sdparekh/zUMIs/master/zUMIs-master.sh \| grep '^vers=' \| cut -f2 -d "="`
		if [ "$currentv" != "$vers" ]; then echo -e "------------- \n\n Good news! A newer version of zUMIs is available at https://github.com/sdparekh/zUMIs \n\n-------------"; fi

Admin message