Commit c92d62fc authored by Christoph's avatar Christoph
Browse files

SS3 UMI pattern MM

parent 00ca1b7c
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -31,6 +31,8 @@ We provide a script to convert zUMIs output into loom file automatically based o
zUMIs will try to automatically do this, otherwise convert zUMIs output to loom by simply running `Rscript rds2loom.R myRun.yaml`.

## Changelog
18 Feb 2021: zUMIs2.9.5: Mismatches for detecting Smart-seq3 UMI-read pattern are now user-settable in the YAML file as follows: `find_pattern: ATTGCGCAATG;2` would allow 2 mismatches. Usage as prior to this version (ie `find_pattern: ATTGCGCAATG`) will default to the previous value of 1 mismatch allowed.

18 Sept 2020 - 29 Nov Sept 2020: zUMIs.2.9.4b/c/d/e/f: Fix & speed up Smart-seq3 UMI read counting. Prevent crash when a chunk of cell BCs does not match any downsampling. Speed up barcode detection steps for some cases. Prevent too much CPU usage in UMI error correction. Take correct samtools executable in gene annotation parsing. Prevent crash in BC error correction for huge datasets.

12 Sept 2020: [zUMIs2.9.4](https://github.com/sdparekh/zUMIs/releases/tag/2.9.4): Speed writing of error-corrected UMI tags to bam file up significantly. Prevent potential crash when no cells meet any user-defined downsampling criteria.
+21 −7
Original line number Diff line number Diff line
@@ -49,7 +49,7 @@ $UMIfilter = distilReads::argClean($argHash{"UMIfilter"});
$pattern = distilReads::argClean($argHash{"find_pattern"});
$frameshift = distilReads::argClean($argHash{"correct_frameshift"});


#print($pattern);
#demult_HEK_r1.fq.gz; demult_HEK_r2.fq.gz;ACTGCTGTA
#if find_pattern exists, readYaml4fqfilter returns  "ATTGCGCAATG character(0) character(0)"

@@ -122,7 +122,6 @@ while(<$fh1>){
  $p3 = $fp1[3];
  $ss3 = "yespattern";

#$flag = 0;
  #This block checks if the read should have certian pattern
  if($p2 =~ /^character/){
    $mcrseq = $rseq;
@@ -130,6 +129,14 @@ while(<$fh1>){
  }
  else{
    $mcrseq = $rseq;
    if($p2 =~ /;/){
      @tmpsplit = split(";",$p2);
      $p2 = $tmpsplit[0];
      $mm = int($tmpsplit[1]);
    }
    else{
      $mm = 1;
    }
    $checkpattern = $p2;
  }

@@ -137,7 +144,7 @@ while(<$fh1>){
  # If it is smart-seq3 pattern in the YAML file but not found in the read then the read is retained as full cDNA read where UMI is null.
  if($p2 eq "ATTGCGCAATG"){
    $a = substr($mcrseq,0,length($p2));
    if(Approx::amatch($checkpattern, [ 1 ],$a)){
    if(Approx::amatch($checkpattern, [ $mm ],$a)){
      $ss3 = "yespattern";
      $checkpattern = $p2;
    }else{
@@ -147,7 +154,6 @@ while(<$fh1>){
  }



#This block checks if the read should be read corrected for frameshift in BC pattern
  if($p3 !~ /^character/){
    @bla = split($p3,$rseq);
@@ -195,6 +201,14 @@ while(<$fh1>){
      }
      else{
        $mcrseq = $rseq1;
        if($pf =~ /;/){
          @tmpsplit = split(";",$pf);
          $pf = $tmpsplit[0];
          $mm = $tmpsplit[1];
        }
        else{
          $mm = '1';
        }
        $checkpattern = $pf;
      }

@@ -202,7 +216,7 @@ while(<$fh1>){
      # If it is smart-seq3 pattern in the YAML file but not found in the read then the read is retained as full cDNA read where UMI is null.
      if($pf eq "ATTGCGCAATG"){
        $af = substr($mcrseq,0,length($pf));
        if(Approx::amatch($checkpattern, [ 1 ],$af)){
        if(Approx::amatch($checkpattern, [ $mm ],$af)){
          $ss3 = "yespattern";
          $checkpattern = $pf;
        }else{
@@ -270,7 +284,7 @@ while(<$fh1>){
# IF the read should not have any pattern, the $checkpattern is equal to $mcrseq so $goahead variable will stay "yes"
    if($checkpattern eq "ATTGCGCAATG"){
      $ac = substr($mcrseq,0,length($checkpattern));
      if(Approx::amatch($checkpattern, [ 1 ],$ac)){
      if(Approx::amatch($checkpattern, [ $mm ],$ac)){
        $goahead = "yes";
      }else{
        $goahead = "no";
+1 −1
Original line number Diff line number Diff line
@@ -3,7 +3,7 @@
# Pipeline to run UMI-seq analysis from fastq to read count tables.
# Authors: Swati Parekh, Christoph Ziegenhain, Beate Vieth & Ines Hellmann
# Contact: sparekh@age.mpg.de or christoph.ziegenhain@ki.se
vers=2.9.4h
vers=2.9.5
currentv=$(curl -s https://raw.githubusercontent.com/sdparekh/zUMIs/main/zUMIs.sh | grep '^vers=' | cut -f2 -d "=")
if [ "$currentv" != "$vers" ] ; then
    echo -e "------------- \n\n Good news! A newer version of zUMIs is available at https://github.com/sdparekh/zUMIs \n\n-------------";