Commit d7393294 authored by TomKellyGenetics's avatar TomKellyGenetics
Browse files

refactor removal of adapters for clarity

parents ccf2872a ff51a6e2
Loading
Loading
Loading
Loading
+30 −16
Original line number Diff line number Diff line
@@ -1035,7 +1035,7 @@ for i in ${keys[@]}; do
done

#inverting R1 and R2 for specific technologies
if [[ "$technology" == "indrop-v2" ]] || [[ "$technology" == "indrop-v3" ]]; then
if [[ "$technology" == "indrop-v2" ]] || [[ "$technology" == "indrop-v3" ]] || [[ "$technology" == "splitseq" ]]; then
    #invert read1 and read2
    echo "***WARNING: technology is set to ${technology}. barcodes on Read 2 will be used***"
    tmp=$read1
@@ -1672,9 +1672,11 @@ else
    echo "  barcodes: ${barcodeadjust}bps at its head"
    echo "  UMIs: ${umiadjust}bps at its tail" 
    
    #for CEL-Seq2 swap barcode and UMI
    echo " making technology-specific modifications ..."
    #CEL-Seq2: swap barcode and UMI
    ## https://github.com/BUStools/bustools/issues/4
    if [[ "$technology" == "sciseq" ]]; then
    if [[ "$technology" == "celseq2" ]]; then
        echo "  ...barcode and UMI swapped for ${technology}"
        for convFile in "${convFiles[@]}"; do
            #swap UMI and barcode
            sed -E '2~2s/(.{6})(.{6})/\2\1/' $convFile > ${crIN}/.temp
@@ -1682,10 +1684,13 @@ else
        done
    fi

    #remove adapter from inDrops see here for details:
    #inDrops: remove adapter (see links below for details)
    ## https://github.com/BUStools/bustools/issues/4
    ## https://teichlab.github.io/scg_lib_structs/methods_html/inDrop.html
    ## https://github.com/sdparekh/zUMIs/wiki/Protocol-specific-setup
    #note that adapters do not have to be removed for the dual-indexed inDrops-v3
    if [[ "$technology" == "indrop-v1" ]] || [[ "$technology" == "indrop-v2" ]]; then
        echo "  ...remove adapter for ${technology}"
        for convFile in "${convFiles[@]}"; do
            #remove adapter if present
            sed -E '
@@ -1696,14 +1701,16 @@ else
                s/^(.{8}).{22}(.{8})/\1\2/g
                }' $convFile |
            #remove linker between barcode and UMI
            echo"  ...barcode and UMI linker removed for ${technology}"
            sed -E '2~2s/^(.{8})(.{8}).{4}(.{6})/\1\2\3/g' > ${crIN}/.temp
            mv ${crIN}/.temp $convFile
        done
    fi
    
    #remove adapter from QuartzSeq
    #QuartzSeq: remove adapter
    if [[ "$technology" == "quartz-seq2-384" ]]; then
        for convFile in "${convFiles[@]}"; do
        echo "  ...remove adapter for ${technology}"
            #remove adapter if detected
            sed -E '
                /^TATAGAATTCGCGGCCGCTCGCGATAC(.{14})(.{8})/ {
@@ -1716,6 +1723,7 @@ else
        done
    fi
    if [[ "$technology" == "quartz-seq2-1536" ]]; then
        echo "  ...remove adapter for ${technology}"
        for convFile in "${convFiles[@]}"; do
            #remove adapter if detected
            sed -E '
@@ -1729,9 +1737,9 @@ else
        done
    fi
    

    #remove adapter from Sci-Seq and swap barcode and UMI
    #Sci-Seq: remove adapter and swap barcode and UMI
    if [[ "$technology" == "sciseq" ]]; then
        echo "  ...remove adapter for ${technology}"
        for convFile in "${convFiles[@]}"; do
            #remove adapter if detected
            sed -E '
@@ -1742,15 +1750,17 @@ else
                s/^(.{18})//g
                }'  $convFile > ${crIN}/.temp
            mv ${crIN}/.temp $convFile
            #swap UMI and barcode
            #swap barcode and UMI
            echo "  ...barcode and UMI swapped for ${technology}"
            sed -E '2~2s/(.{8})(.{10})/\2\1/' $convFile > ${crIN}/.temp
            mv ${crIN}/.temp $convFile            
        done
    fi
    
    #remove adapter from SureCell (and correct phase blocks)
    #SureCell: remove adapter and correct phase blocks
    ## https://github.com/Hoohm/dropSeqPipe/issues/42
    if [[ "$technology" == "surecell" ]]; then
        echo "  ...remove adapter and phase blocks for ${technology}"
        for convFile in "${convFiles[@]}"; do
            #remove phase blocks and linkers
            sed -E '
@@ -1764,12 +1774,13 @@ else
        done
    fi

    # SPLiT-Seq can be set up similarly if a whitelist and 18bp barcode can be supported
    #SPLiT-Seq: correct phase blocks and swap barcode and UMI (if a whitelist and 18bp barcode can be supported)
    ## https://github.com/hms-dbmi/dropEst/issues/80
    ## https://github.com/sdparekh/zUMIs/wiki/Protocol-specific-setup
    if [[ "$technology" == "splitseq" ]]; then
        echo "  ...remove adapter and phase blocks for ${technology}"
        for convFile in "${convFiles[@]}"; do
            #remove phase blocks and linkers (swap UMI and barcode)
            #remove phase blocks and linkers (swap barcode and UMI)
            sed -E '
                /^([ATCGA]{92})/ {
                s/^(.{10})(.{8}).{30}(.{8}).{30}(.{8})*/\2\3\4\1/g
@@ -1778,12 +1789,15 @@ else
                s/^(.{10})(.{8}).{30}(.{8}).{30}(.{8})*/\2\3\4\1/g
                }' $convFile > ${crIN}/.temp
            mv ${crIN}/.temp $convFile
        echo "  ...barcode and UMI swapped for ${technology}" #performed by \1 above
        done
    fi

    #remove adapter from SCRB-Seq

    #SCRB-Seq: remove adapter
    ## https://teichlab.github.io/scg_lib_structs/methods_html/SCRB-seq.html
    if [[ "$technology" == "scrbseq" ]]; then
        echo "  ...remove adapter for ${technology}"
        for convFile in "${convFiles[@]}"; do
            #remove adapters
            sed -E '