Commit e105a3ad authored by cziegenhain's avatar cziegenhain
Browse files

approx ss3 pattern

parent ddff1a62
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -29,6 +29,8 @@ We provide a script to convert zUMIs output into loom file automatically based o
zUMIs will try to automatically do this, otherwise convert zUMIs output to loom by simply running `Rscript rds2loom.R myRun.yaml`.

## Changelog
15 May 2020: zUMIs2.8.1: Smart-seq3 UMI pattern detection now takes one hamming distance error into account.

02 May 2020: zUMIs2.8.0: Ensure correct behaviour when resuming from mapping (`which_stage: Mapping`). It is now possible to fractionally count reads overlapping multiple features (on the same strand if strand-specificity is used) `multi_overlap: yes`. New function for parsing the gene annotations: Fixed a problem with genes completely within other another gene's intron & added the possibility to extend genomic interval of exons (this is useful for crossmapping species with poor annotations). Added a probability calculation to compare the occurance of intronic reads to an expectation generated from background intergenic mapping reads (`intronProb: yes`). Added statistics output for the total number of reads observed per gene.

22 Apr 2020: zUMIs2.7.3: zUMIs will try to parse a geneID to gene name mapping file from the user provided GTF annotations.
+30 −13
Original line number Diff line number Diff line
@@ -21,7 +21,7 @@ $tmpPrefix=$ARGV[5];
}
use lib "$zumisdir";
use distilReads;
#use Approx;
use Approx;

open(YL,"$rscriptexc $zumisdir/readYaml4fqfilter.R $yml |");
@arg=<YL>;
@@ -127,13 +127,13 @@ while(<$fh1>){
  # This block checks if smart-seq3 pattern is present and if it is found in the reads
  # If it is smart-seq3 pattern in the YAML file but not found in the read then the read is retained as full cDNA read where UMI is null.
  if($p2 eq "ATTGCGCAATG"){
    if($mcrseq !~ m/^$checkpattern/){
    $ss3 = "nopattern";
    $checkpattern = $mcrseq;
#    print "nopattern\n";
    }else{
    $a = substr($mcrseq,0,length($p2));
    if(Approx::amatch($checkpattern, [ 1 ],$a)){
      $ss3 = "yespattern";
      $checkpattern = $p2;
    }else{
      $ss3 = "nopattern";
      $checkpattern = $mcrseq;
    }
  }

@@ -192,13 +192,13 @@ while(<$fh1>){
      # This block checks if smart-seq3 pattern is present and if it is found in the reads
      # If it is smart-seq3 pattern in the YAML file but not found in the read then the read is retained as full cDNA read where UMI is null.
      if($pf eq "ATTGCGCAATG"){
        if($mcrseq !~ m/^$checkpattern/){
          $ss3 = "nopattern";
          $checkpattern = $mcrseq;
        #  print "nopattern\n";
        }else{
        $af = substr($mcrseq,0,length($pf));
        if(Approx::amatch($checkpattern, [ 1 ],$af)){
          $ss3 = "yespattern";
          $checkpattern = $pf;
        }else{
          $ss3 = "nopattern";
          $checkpattern = $mcrseq;
        }
      }

@@ -257,11 +257,28 @@ while(<$fh1>){
    $btmp = grep {$_ < $bcthres[1]} @bquals;
    $mtmp = grep {$_ < $umithres[1]} @mquals;

## Check if it is a smartseq3 pattern. If so, allow for approximate match with 1 base distance. If it is not a smartseq3 pattern, check if the read starts with the pattern at all. The $goahead variable decides if the read passes the quality threshold of a pattern.
# IF the read should not have any pattern, the $checkpattern is equal to $mcrseq so $goahead variable will stay "yes"
    if($checkpattern eq "ATTGCGCAATG"){
      $ac = substr($mcrseq,0,length($checkpattern));
      if(Approx::amatch($checkpattern, [ 1 ],$ac)){
        $goahead = "yes";
      }else{
        $goahead = "no";
      }
    }else{
      if($mcrseq =~ m/^$checkpattern/){
        $goahead = "yes";
      }else{
        $goahead = "no";
      }
    }
    # print out only if above the quality threshold

     #if(($btmp < $bcthres[0]) && ($mtmp < $umithres[0]) && ( Approx::amatch($checkpattern, [ 1 ],$mcrseq) ) && ($isPass ne "fail")){
     if(($btmp < $bcthres[0]) && ($mtmp < $umithres[0]) && ($mcrseq =~ m/^$checkpattern/) && ($isPass ne "fail")){
     #if(($btmp < $bcthres[0]) && ($mtmp < $umithres[0]) && ($mcrseq =~ m/^$checkpattern/) && ($isPass ne "fail")){
     #if(($btmp < $bcthres[0]) && ($mtmp < $umithres[0])){

     if(($btmp < $bcthres[0]) && ($mtmp < $umithres[0]) && ($goahead eq "yes") && ($isPass ne "fail")){
      chomp($rid);

      if($rid =~ m/^\@.*\s/){
+1 −1
Original line number Diff line number Diff line
@@ -3,7 +3,7 @@
# Pipeline to run UMI-seq analysis from fastq to read count tables.
# Authors: Swati Parekh, Christoph Ziegenhain, Beate Vieth & Ines Hellmann
# Contact: sparekh@age.mpg.de or christoph.ziegenhain@ki.se
vers=2.8.0
vers=2.8.1
currentv=`curl -s https://raw.githubusercontent.com/sdparekh/zUMIs/master/zUMIs-master.sh | grep '^vers=' | cut -f2 -d "="`
if [ "$currentv" != "$vers" ]; then echo -e "------------- \n\n Good news! A newer version of zUMIs is available at https://github.com/sdparekh/zUMIs \n\n-------------"; fi