Commit 129b4bda authored by TomKellyGenetics's avatar TomKellyGenetics
Browse files

add barcode whitelists and adapter trimming for BD Rhapsody

parent 91d08672
Loading
Loading
Loading
Loading
+24 −2
Original line number Diff line number Diff line
@@ -1657,7 +1657,13 @@ else
        if [[ $verbose ]]; then
            echo "  generating a new barcode whitelist for ${technology}"
        fi
        if [[ "$technology" == "indrop-v"* ]]; then
        if [[ "$technology" == "bd-rhapsody" ]]; then
             if [[ ! -f ${whitelistdir}/bd_rhapsody_barcode.txt ]]; then
                 #generates all combinations of I1-I2-R1 barcodes
                 join -j 9999 ${whitelistdir}/bd_rhapsody_cell_label_section1.txt ${whitelistdir}/bd_rhapsody_cell_label_section2.txt | sed "s/ //g" | \
                 join -j 9999 - ${whitelistdir}/bd_rhapsody_cell_label_section3.txt | sed "s/ //g"  > ${whitelistdir}/bd_rhapsody_barcode.txt
             fi
        elif [[ "$technology" == "indrop-v"* ]]; then
            if [[ "$technology" == "indrop-v1" ]] || [[ $technology"" == "indrop-v2" ]]; then
                 perl ${MAKEINDROPBARCODES} ${whitelistdir}/inDrop_gel_barcode1_list.txt ${whitelistdir}/inDrop_gel_barcode2_list.txt v2 ${whitelistdir}
            elif [[ "$technology" == "indrop-v3" ]]; then
@@ -2392,6 +2398,22 @@ else
        done
    fi
    
    #BD Rhapsody: remove adapters
    if [[ "$technology" == "bd-rhapsody" ]]; then
        echo "  ...remove adapter and phase blocks for ${technology}"
        for convFile in "${convFiles[@]}"; do
            #remove phase blocks and linkers
            sed -E '
                /.*(.{9})ACTGGCCTGCGA(.{9})GGTAGCGGTGACA(.{9})(.{8})/ {
                s/.*(.{9})ACTGGCCTGCGA(.{9})GGTAGCGGTGACA(.{9})(.{8})/\1\2\3\4/g
                n
                n
                s/.*(.{6}).{12}(.{6}).{13}(.{6})(.{8})/\1\2\3\4/g
                }' $convFile > ${crIN}/.temp
            mv ${crIN}/.temp $convFile
        done
    fi
    
    #inDrops: remove adapter (see links below for details)
    ## https://github.com/BUStools/bustools/issues/4
    ## https://teichlab.github.io/scg_lib_structs/methods_html/inDrop.html
@@ -2634,7 +2656,7 @@ else
            #remove phase blocks and linkers
            sed -E '
                /.*(.{6})TAGCCATCGCATTGC(.{6})TACCTCTGAGCTGAA(.{6})ACG(.{8})GAC/ {
                s/.*(.{6})TAGCCATCGCATTGC(.{6})TACCTCTGAGCTGAA(.{6})ACG(.{8})GAC.*/\1\2\3\4/g
                s/.*(.{6})TAGCCATCGCATTGC(.{6})TACCTCTGAGCTGAA(.{6})ACG(.{8})GAC/\1\2\3\4/g
                n
                n
                s/.*(.{6}).{15}(.{6}).{15}(.{6}).{3}(.{8}).{3}/\1\2\3\4/g
+97 −0
Original line number Diff line number Diff line
GTCGCTATA
CTTGTACTA
CTTCACATA
ACACGCCGG
CGGTCCAGG
AATCGAATG
CCTAGTATA
ATTGGCTAA
AAGACATGC
AAGGCGATC
GTGTCCTTA
GGATTAGGA
ATGGATCCA
ACATAAGCG
AACTGTATT
ACCTTGCGG
CAGGTGTAG
AGGAGATTA
GCGATTACA
ACCGGATAG
CCACTTGGA
AGAGAAGTT
TAAGTTCGA
ACGGATATT
TGGCTCAGA
GAATCTGTA
ACCAAGGAC
AGTATCTGT
CACACACTA
ATTAAGTGC
AAGTAACCC
AAATCCTGT
CACATTGCA
GCACTGTCA
ATACTTAGG
GCAATCCGA
ACGCAATCA
GAGTATTAG
GACGGATTA
CAGCTGACA
CAACATATT
AACTTCTCC
CTATGAAAT
ATTATTACC
TACCGAGCA
TCTCTTCAA
TAAGCGTTA
GCCTTACAA
AGCACACAG
ACAGTTCCG
AGTAAAGCC
CAGTTTCAC
CGTTACTAA
TTGTTCCAA
AGAAGCACT
CAGCAAGAT
CAAACCGCC
CTAACTCGC
AATATTGGG
AGAACTTCC
CAAAGGCAC
AAGCTCAAC
TCCAGTCGA
AGCCATCAC
AACGAGAAG
CTACAGAAC
AGAGCTATG
GAGGATGGA
TGTACCTTA
ACACACAAA
TCAGGAGGA
GAGGTGCTA
ACCCTGACC
ACAAGGATC
ATCCCGGAG
TATGTGGCA
GCTGCCAAT
ATCAGAGCT
TCGAAGTGA
ATAGACGAG
AGCCCAATC
CAGAATCGT
ATCTCCACA
ACGAAAGGT
TAGCTTGTA
ACACGAGAT
AACCGCCTC
ATTTAGATG
CAAGCAAGC
CAAAGTGTG
GGCAAGCAA
GAGCCAATA
ATGTAATGG
CCTGAGCAA
GAGTACATT
TGCGATCTA
ATCACGTTA
 No newline at end of file
+97 −0
Original line number Diff line number Diff line
TACAGGATA
CACCAGGTA
TGTGAAGAA
GATTCATCA
CACCCAAAG
CACAAAGGC
GTGTGTCGA
CTAGGTCCT
ACAGTGGTA
TCGTTAGCA
AGCGACACC
AAGCTACTT
TGTTCTCCA
ACGCGAAGC
CAGAAATCG
ACCAAAATG
AGTGTTGTC
TAGGGATAC
AGGGCTGGT
TCATCCTAA
AATCCTGAA
ATCCTAGGA
ACGACCACC
TTCCATTGA
TAGTCTTGA
ACTGTTAGA
ATTCATCGT
ACTTCGAGC
TTGCGTACA
CAGTGCCCG
GACACTTAA
AGGAGGCGC
GCCTGTTCA
GTACATCTA
AATCAGTTT
ACGATGAAT
TGACAGACA
ATTAGGCAT
GGAGTCTAA
TAGAACACA
AAATAAATA
CCGACAAGA
CACCTACCC
AAGAGTAGA
TCATTGAGA
GACCTTAGA
CAAGACCTA
GGAATGATA
AAACGTACC
ACTATCCTC
CCGTATCTA
ACACATGTC
TTGGTATGA
GTGCAGTAA
AGGATTCAA
AGAATGGAG
CTCTCTCAA
GCTAACTCA
ATCAACCGA
ATGAGTTAC
ACTTGATGA
ACTTTAACT
TTGGAGGTA
GCCAATGTA
ATCCAACCG
GATGAACTG
CCATGCACA
TAGTGACTA
AAACTGCGC
ATTACCAAG
CACTCGAGA
AACTCATTG
CTTGCTTCA
ACCTGAGTC
AGGTTCGCT
AAGGACTAT
CGTTCGGTA
AGATAGTTC
CAATTGATC
GCATGGCTA
ACCAGGTGT
AGCTGCCGT
TATAGCCCT
AGAGGACCA
ACAATATGG
CAGCACTTC
CACTTATGT
AGTGAAAGG
AACCCTCGG
AGGCAGCTA
AACCAAAGT
GAGTGCGAA
CGCTAAGCA
AATTATAAC
TACTAGTCA
CAACAACGG
CGATGTTTA
 No newline at end of file
+97 −0
Original line number Diff line number Diff line
AAGCCTTCT
ATCATTCTG
CACAAGTAT
ACACCTTAG
GAACGACAA
AGTCTGTAC
AAATTACAG
GGCTACAGA
AATGTATCG
CAAGTAGAA
GATCTCTTA
AACAACGCG
GGTGAGTTA
CAGGGAGGG
TCCGTCTTA
TGCATAGTA
ACTTACGAT
TGTATGCGA
GCTCCTTGA
GGCACAACA
CTCAAGACA
ACGCTGTTG
ATATTGTAA
AAGTTTACG
CAGCCTGGC
CTATTAGCC
CAAACGTGG
AAAGTCATT
GTCTTGGCA
GATCAGCGA
ACATTCGGC
AGTAATTAG
TGAAGCCAA
TCTACGACA
CATAACGTT
ATGGGACTC
GATAGAGGA
CTACATGCG
CAACGATCT
GTTAGCCTA
AGTTGCATC
AAGGGAACT
ACTACATAT
CTAAGCTTC
ACGAACCAG
TACTTCGGA
AACATCCAT
AGCCTGGTT
CAAGTTTCC
CAGGCATTT
ACGTGGGAG
TCTCACGGA
GCAACATTA
ATGGTCCGT
CTATCATGA
CAATACAAG
AAAGAGGCC
GTAGAAGCA
GCTATGGAA
ACTCCAGGG
ACAAGTGCA
GATGGTCCA
TCCTCAATA
AATAAACAA
CTGTACGGA
CTAGATAGA
AGCTATGTG
AAATGGAGG
AGCCGCAAG
ACAGTAAAC
AACGTGTGA
ACTGAATTC
AAGGGTCAG
TGTCTATCA
TCAGATTCA
CACGATCCG
AACAGAAAC
CATGAATGA
CGTACTACG
TTCAGCTCA
AAGGCCGCA
GGTTGGACA
CGTCTAGGT
AATTCGGCG
CAACCTCCA
CAATAGGGT
ACAGGCTCC
ACAACTAGT
AGTTGTTCT
AATTACCGG
ACAAACTTT
TCTCGGTTA
ACTAGACCG
ACTCATACG
ATCGAGTCT
CATAGGTCA
TTAGGCATA
 No newline at end of file