Commit 85db86af authored by TomKellyGenetics's avatar TomKellyGenetics
Browse files

support 5' chemistry with C1 CAGE with TSO conversion

parent 7d18236f
Loading
Loading
Loading
Loading
+25 −0
Original line number Diff line number Diff line
@@ -2694,6 +2694,31 @@ else
                ## 6 bp barcode, 10 bp UMI (TSO not handled yet)
                mv $crIN/mock_UMI.fastq ${convR1}
           fi
           
            if [[ "$technology" == "c1-cage" ]]; then
                #convert TSO to expected length for 10x 5' (TSS in R1 from base 39)
                echo " handling $convFile ..."
                tsoS="TTTCTTATATGGG"
                tsoQ="IIIIIIIIIIIII"
                chemistry="SC5P-PE"
                #Add 10x TSO characters to the end of the sequence (removes 'NNNNNNNNTATAGGG')
                cmd=$(echo 'sed -E "2~4s/(.{'$barcodelength'})(.{'${umilength}'})(.{8})TATAGGG/\1\2'$tsoS'/" '$convFile' > '${crIN}'/.temp')
                if [[ $verbose ]]; then
                    echo technology $technology
                    echo barcode: $barcodelength
                    echo umi: $umilength
                    echo $cmd
                fi
                # run command with barcode and umi length, e.g.,: sed -E "2~4s/(.{16})(.{8})(.{3})(.*)/\1\2$tsoS\4/"  $convFile > ${crIN}/.temp
                eval $cmd
                mv ${crIN}/.temp $convFile
                #Add n characters to the end of the quality
                cmd=$(echo 'sed -E "4~4s/(.{'$barcodelength'})(.{'${umilength}'})(.{8})(.{7})/\1\2'$tsoQ'/" '$convFile' > '${crIN}'/.temp')
                # run command with barcode and umi length, e.g.,: sed -E "4~4s/(.{16})(.{8})(.{3})(.*)/\1\2$tsoQ\4/"  $convFile > ${crIN}/.temp
                eval $cmd
                mv ${crIN}/.temp $convFile
            fi
            echo "  ${convFile} adjusted"
        done
    fi