Commit 88591108 authored by TomKellyGenetics's avatar TomKellyGenetics
Browse files

replace UMI with work (hardcoded for converted barcode length)

parent 746c46bd
Loading
Loading
Loading
Loading
+57 −42
Original line number Original line Diff line number Diff line
@@ -1657,7 +1657,7 @@ if [[ -n "$barcodefile" ]]; then
        barcodefile=$(readlink -f $barcodefile)
        barcodefile=$(readlink -f $barcodefile)
        custombarcodes=true
        custombarcodes=true
        #allowing WellList from ICELL8 and other well-based techniques
        #allowing WellList from ICELL8 and other well-based techniques
        if [[ "$technology" == "bd-rhapsody" ]] || [[ "$technology" == "icell8" ]] || [[ "$technology" == "quartz-seq" ]] || [[ "$technology" == "ramda-seq" ]] [[ "$technology" == "quartz-seq2*" ]] || [[ "$technology" == "microwellseq" ]] || [[ "$technology" == "smartseq*" ]] || [[ "$technology" == "seqwell" ]] || [[ "$technology" == "sciseq2" ]] || [[ "$technology" == "sciseq3" ]] || [[ "$technology" == "scifiseq" ]] || [[ "$technology" == "splitseq" ]] || [[ "$technology" == "splitseq2" ]] || [[ "$technology" == "custom" ]]; then
        if [[ "$technology" == "bd-rhapsody" ]] || [[ "$technology" == "icell8" ]] || [[ "$technology" == "quartz-seq" ]] || [[ "$technology" == "ramda-seq" ]] || [[ "$technology" == "quartz-seq2*" ]] || [[ "$technology" == "microwellseq" ]] || [[ "$technology" == "smartseq*" ]] || [[ "$technology" == "seqwell" ]] || [[ "$technology" == "sciseq2" ]] || [[ "$technology" == "sciseq3" ]] || [[ "$technology" == "scifiseq" ]] || [[ "$technology" == "splitseq" ]] || [[ "$technology" == "splitseq2" ]] || [[ "$technology" == "custom" ]]; then
            seg=$'\t'
            seg=$'\t'
            n_col=$(awk -F'\t' '{print NF}' $barcodefile | sort -nu | tail -n 1)
            n_col=$(awk -F'\t' '{print NF}' $barcodefile | sort -nu | tail -n 1)
            if [[ $n_col -eq 1 ]]; then
            if [[ $n_col -eq 1 ]]; then
@@ -2627,7 +2627,7 @@ else
                perl sub/AddMockUMI.pl --fastq=${convR1} --out_dir $crIN --head_length=$barcodelength --umi_length=$umi_default
                perl sub/AddMockUMI.pl --fastq=${convR1} --out_dir $crIN --head_length=$barcodelength --umi_length=$umi_default
                umilength=$umi_default
                umilength=$umi_default
                umiadjust=0
                umiadjust=0
                if [[ $chemistry == "SC3Pv3"]; then
                if [[ $chemistry == "SC3Pv3" ]]; then
                    chemistry="SC3Pv2"
                    chemistry="SC3Pv2"
                fi
                fi
                
                
@@ -2761,7 +2761,7 @@ else
                perl sub/AddMockUMI.pl --fastq=${convR1} --out_dir $crIN --head_length=$barcodelength --umi_length=$umi_default
                perl sub/AddMockUMI.pl --fastq=${convR1} --out_dir $crIN --head_length=$barcodelength --umi_length=$umi_default
                umilength=$umi_default
                umilength=$umi_default
                umiadjust=0
                umiadjust=0
                if [[ $chemistry == "SC3Pv3"]; then
                if [[ $chemistry == "SC3Pv3" ]]; then
                    chemistry="SC3Pv2"
                    chemistry="SC3Pv2"
                fi
                fi
                #returns a combined R1 file with barcode and mock UMI
                #returns a combined R1 file with barcode and mock UMI
@@ -3215,6 +3215,26 @@ else
        fi
        fi
    fi
    fi
    
    
    #convert UMI
    echo " adjusting UMIs of R1 files"
    # check if original UMI is shorter than default
    if [[ 0 -gt $umiadjust ]]; then
        for convFile in "${convFiles[@]}"; do
            echo " handling $convFile ..."
            toS=`printf '%0.sA' $(seq 1 $(($umiadjust * -1)))`
            toQ=`printf '%0.sI' $(seq 1 $(($umiadjust * -1)))`
            #compute length of adjusted barcode + original UMI
            keeplength=`echo $((${barcode_default}+${umi_default}-($umiadjust * -1)))`
            #Add n characters to the end of the sequence
            sed -E "2~4s/(.{$keeplength})(.*)/\1$toS\2/"  $convFile > ${crIN}/.temp
            mv ${crIN}/.temp $convFile
            #Add n characters to the end of the quality
            sed -E "4~4s/(.{$keeplength})(.*)/\1$toQ\2/"  $convFile > ${crIN}/.temp
            mv ${crIN}/.temp $convFile
            echo "  ${convFile} adjusted"
        done
    fi
    
    #replace UMI with mock UMI to count reads (for technologies not already containing mock UMI)
    #replace UMI with mock UMI to count reads (for technologies not already containing mock UMI)
    if [[ $technology != "icell8" ]] && [[ $technology != "ramda-seq" ]] && [[ $technology != "quartz-seq" ]] && [[ $technology != "smartseq" ]] && [[ $technology != "smartseq2" ]] && [[ $technology != "strt-seq" ]]; then
    if [[ $technology != "icell8" ]] && [[ $technology != "ramda-seq" ]] && [[ $technology != "quartz-seq" ]] && [[ $technology != "smartseq" ]] && [[ $technology != "smartseq2" ]] && [[ $technology != "strt-seq" ]]; then
        if [[ $nonUMI ]]; then
        if [[ $nonUMI ]]; then
@@ -3222,56 +3242,51 @@ else
            echo "NOTICE: results will result read counts not UMI"
            echo "NOTICE: results will result read counts not UMI"
            echo "## this behaviour is not recommended unless integrating with non-UMI data ##"
            echo "## this behaviour is not recommended unless integrating with non-UMI data ##"
             
             
            for convFile in "${convFiles[@]}"; do
                convR1=$convFile
                #remove inflated umi (to replace with mock and count as reads)
                #remove inflated umi (to replace with mock and count as reads)
             cmd=$(echo 'sed -E "
                sed -E "
                             /^(.{'$barcodelength'})(.{'${umilength}'})(.*)/ {
                    /^(.{16})(.{10})(.*)/ {
                            s/^(.{'$barcodelength'})(.{'${umilength}'})(.*)/\1\3/g
                    s/^(.{16})(.{10})(.*)/\1\3/g
                    n
                    n
                    n
                    n
                            s/^(.{'$barcodelength'})(.{'${umilength}'})(.*)/\1\3/g
                    s/^(.{16})(.{10})(.*)/\1\3/g
                            }" $convFile > ${crIN}/.temp
                }" $convFile > ${crIN}/.temp\n
                         mv ${crIN}/.temp $convFile')
                mv ${crIN}/.temp $convFile'
                if [[ $chemistry == "SC3Pv3" ]]; then
                    chemistry="SC3Pv2"
                fi
                #cmd=$(echo 'sed -E "
                #                /^(.{'$barcodelength'})(.{'${umilength}'})(.*)/ {
                #               s/^(.{'$barcodelength'})(.{'${umilength}'})(.*)/\1\3/g
                #               n
                #               n
                #               s/^(.{'$barcodelength'})(.{'${umilength}'})(.*)/\1\3/g
                #               }" $convFile > ${crIN}/.temp\n
                #            mv ${crIN}/.temp $convFile')
                if [[ $verbose ]]; then
                if [[ $verbose ]]; then
                    echo technology $technology
                    echo technology $technology
                    echo barcode: $barcodelength
                    echo barcode: $barcodelength
                     echo umi: $umilength
                     echo umi: $umilength
                 echo $cmd
                #    echo $cmd
                fi
                fi
             eval $cmd
                #eval $cmd
                
                
                # add mock UMI (count reads instead of UMI) barcodelength=16, umi_default=10
                # add mock UMI (count reads instead of UMI) barcodelength=16, umi_default=10
                perl sub/AddMockUMI.pl --fastq=${convR1} --out_dir $crIN --head_length=$barcodelength --umi_length=$umi_default
                perl sub/AddMockUMI.pl --fastq=${convR1} --out_dir $crIN --head_length=$barcodelength --umi_length=$umi_default
                umilength=$umi_default
                umilength=$umi_default
                umiadjust=0
                umiadjust=0
             if [[ $chemistry == "SC3Pv3"]; then
                if [[ $chemistry == "SC3Pv3" ]]; then
                    chemistry="SC3Pv2"
                    chemistry="SC3Pv2"
                fi
                fi
                
                
                #returns a combined R1 file with barcode and mock UMI
                #returns a combined R1 file with barcode and mock UMI
                ## barcode, 10 bp UMI, followed by TSO (if applicable)
                ## barcode, 10 bp UMI, followed by TSO (if applicable)
                mv $crIN/mock_UMI.fastq ${convR1}
                mv $crIN/mock_UMI.fastq ${convR1}
            done
        fi
        fi
    fi
    fi
    
    
    #UMI
    echo " adjusting UMIs of R1 files"
    # check if original UMI is shorter than default
    if [[ 0 -gt $umiadjust ]]; then
        for convFile in "${convFiles[@]}"; do
            echo " handling $convFile ..."
            toS=`printf '%0.sA' $(seq 1 $(($umiadjust * -1)))`
            toQ=`printf '%0.sI' $(seq 1 $(($umiadjust * -1)))`
            #compute length of adjusted barcode + original UMI
            keeplength=`echo $((${barcode_default}+${umi_default}-($umiadjust * -1)))`
            #Add n characters to the end of the sequence
            sed -E "2~4s/(.{$keeplength})(.*)/\1$toS\2/"  $convFile > ${crIN}/.temp
            mv ${crIN}/.temp $convFile
            #Add n characters to the end of the quality
            sed -E "4~4s/(.{$keeplength})(.*)/\1$toQ\2/"  $convFile > ${crIN}/.temp
            mv ${crIN}/.temp $convFile
            echo "  ${convFile} adjusted"
        done
    fi
    # check if original UMI is longer than default
    # check if original UMI is longer than default
    if [[ 0 -lt $umiadjust ]]; then
    if [[ 0 -lt $umiadjust ]]; then
        for convFile in "${convFiles[@]}"; do
        for convFile in "${convFiles[@]}"; do