Commit d0bb5644 authored by TomKellyGenetics's avatar TomKellyGenetics
Browse files

replace UMI with mock (removes hardcoded barcode length) by non-standard eval

parent 3c5e1ae5
Loading
Loading
Loading
Loading
+29 −44
Original line number Original line Diff line number Diff line
@@ -3215,26 +3215,6 @@ else
        fi
        fi
    fi
    fi
    
    
    #convert UMI
    echo " adjusting UMIs of R1 files"
    # check if original UMI is shorter than default
    if [[ 0 -gt $umiadjust ]]; then
        for convFile in "${convFiles[@]}"; do
            echo " handling $convFile ..."
            toS=`printf '%0.sA' $(seq 1 $(($umiadjust * -1)))`
            toQ=`printf '%0.sI' $(seq 1 $(($umiadjust * -1)))`
            #compute length of adjusted barcode + original UMI
            keeplength=`echo $((${barcode_default}+${umi_default}-($umiadjust * -1)))`
            #Add n characters to the end of the sequence
            sed -E "2~4s/(.{$keeplength})(.*)/\1$toS\2/"  $convFile > ${crIN}/.temp
            mv ${crIN}/.temp $convFile
            #Add n characters to the end of the quality
            sed -E "4~4s/(.{$keeplength})(.*)/\1$toQ\2/"  $convFile > ${crIN}/.temp
            mv ${crIN}/.temp $convFile
            echo "  ${convFile} adjusted"
        done
    fi
    
    #replace UMI with mock UMI to count reads (for technologies not already containing mock UMI)
    #replace UMI with mock UMI to count reads (for technologies not already containing mock UMI)
    if [[ $technology != "icell8" ]] && [[ $technology != "ramda-seq" ]] && [[ $technology != "quartz-seq" ]] && [[ $technology != "smartseq" ]] && [[ $technology != "smartseq2" ]] && [[ $technology != "strt-seq" ]]; then
    if [[ $technology != "icell8" ]] && [[ $technology != "ramda-seq" ]] && [[ $technology != "quartz-seq" ]] && [[ $technology != "smartseq" ]] && [[ $technology != "smartseq2" ]] && [[ $technology != "strt-seq" ]]; then
        if [[ $nonUMI ]]; then
        if [[ $nonUMI ]]; then
@@ -3244,35 +3224,20 @@ else
             
             
            for convFile in "${convFiles[@]}"; do
            for convFile in "${convFiles[@]}"; do
                convR1=$convFile
                convR1=$convFile
                #remove inflated umi (to replace with mock and count as reads)
                #remove inflated umi (to replace with mock and count as reads) by non-standard evaluation to depend on variable umi-length
                sed -E "
                cmd=$(echo 'sed -E "2~4s/\^(.{'$barcodelength'})(.{'${umilength}'})(.\*)$/\1\3/" '$convFile' > '${crIN}'/.temp')
                    /^(.{16})(.{10})(.*)/ {
                    s/^(.{16})(.{10})(.*)/\1\3/g
                    n
                    n
                    s/^(.{16})(.{10})(.*)/\1\3/g
                }" $convFile > ${crIN}/.temp\n
                mv ${crIN}/.temp $convFile'
                if [[ $chemistry == "SC3Pv3" ]]; then
                    chemistry="SC3Pv2"
                fi
                #cmd=$(echo 'sed -E "
                #                /^(.{'$barcodelength'})(.{'${umilength}'})(.*)/ {
                #               s/^(.{'$barcodelength'})(.{'${umilength}'})(.*)/\1\3/g
                #               n
                #               n
                #               s/^(.{'$barcodelength'})(.{'${umilength}'})(.*)/\1\3/g
                #               }" $convFile > ${crIN}/.temp\n
                #            mv ${crIN}/.temp $convFile')
                if [[ $verbose ]]; then
                if [[ $verbose ]]; then
                    echo technology $technology
                    echo technology $technology
                    echo barcode: $barcodelength
                    echo barcode: $barcodelength
                    echo umi: $umilength
                    echo umi: $umilength
                #    echo $cmd
                    echo $cmd
                fi
                fi
                #eval $cmd
                eval $cmd
                mv ${crIN}/.temp $convFile
                
                
                # add mock UMI (count reads instead of UMI) barcodelength=16, umi_default=10
                if [[ $chemistry == "SC3Pv3" ]]; then
                     chemistry="SC3Pv2"
                fi
                perl sub/AddMockUMI.pl --fastq=${convR1} --out_dir $crIN --head_length=$barcodelength --umi_length=$umi_default
                perl sub/AddMockUMI.pl --fastq=${convR1} --out_dir $crIN --head_length=$barcodelength --umi_length=$umi_default
                umilength=$umi_default
                umilength=$umi_default
                umiadjust=0
                umiadjust=0
@@ -3287,6 +3252,26 @@ else
        fi
        fi
    fi
    fi
    
    
    #convert UMI
    echo " adjusting UMIs of R1 files"
    # check if original UMI is shorter than default
    if [[ 0 -gt $umiadjust ]]; then
        for convFile in "${convFiles[@]}"; do
            echo " handling $convFile ..."
            toS=`printf '%0.sA' $(seq 1 $(($umiadjust * -1)))`
            toQ=`printf '%0.sI' $(seq 1 $(($umiadjust * -1)))`
            #compute length of adjusted barcode + original UMI
            keeplength=`echo $((${barcode_default}+${umi_default}-($umiadjust * -1)))`
            #Add n characters to the end of the sequence
            sed -E "2~4s/(.{$keeplength})(.*)/\1$toS\2/"  $convFile > ${crIN}/.temp
            mv ${crIN}/.temp $convFile
            #Add n characters to the end of the quality
            sed -E "4~4s/(.{$keeplength})(.*)/\1$toQ\2/"  $convFile > ${crIN}/.temp
            mv ${crIN}/.temp $convFile
            echo "  ${convFile} adjusted"
        done
    fi
    
    # check if original UMI is longer than default
    # check if original UMI is longer than default
    if [[ 0 -lt $umiadjust ]]; then
    if [[ 0 -lt $umiadjust ]]; then
        for convFile in "${convFiles[@]}"; do
        for convFile in "${convFiles[@]}"; do