Commit 3ae9d1ce authored by TomKellyGenetics's avatar TomKellyGenetics
Browse files

generate microwell-seq barcodes and trim adapters

parent 3cd4b4f4
Loading
Loading
Loading
Loading
+22 −0
Original line number Diff line number Diff line
@@ -1660,6 +1660,12 @@ else
                #allow for barcodes in index (I1) and R1
                perl ${MAKEINDROPBARCODES} ${whitelistdir}/inDrop_gel_barcode1_list.txt ${whitelistdir}/inDrop_gel_barcode2_list.txt v3 ${whitelistdir}
            fi
        elif [[ "$technology" == "microwellseq" ]]; then
            if [[ ! -f ${whitelistdir}/microwellseq_barcode.txt ]]; then
                 #generates all combinations of R1 barcodes
                 join -j 9999 ${whitelistdir}/microwell-seq_barcodeA.txt ${whitelistdir}/microwell-seq_barcodeB.txt | sed "s/ //g" | \
                 join -j 9999 - ${whitelistdir}/microwell-seq_barcodeC.txt > ${whitelistdir}/microwellseq_barcode.txt
            fi
        elif [[ "$technology" == "sciseq2" ]]; then
             #generates all combinations of I1-I2-R1 barcodes
             if [[ ! -f ${whitelistdir}/sciseq2_barcode.txt ]]; then
@@ -2464,6 +2470,22 @@ else
        done
    fi
    
    #Microwell-Seq: remove linkers
    if [[ "$technology" == "microwellseq" ]]; then
        echo "  ...remove adapter and phase blocks for ${technology}"
        for convFile in "${convFiles[@]}"; do
            #remove phase blocks and linkers
            sed -E '
                /.*(.{6})CGACTCACTACAGGG(.{6})TCGGTGACACGATCG(.{6})(.{6})/ {
                s/.*(.{6})CGACTCACTACAGGG(.{6})TCGGTGACACGATCG(.{6})(.{6})/\1\2\3\4/g
                n
                n
                s/.*(.{6}).{15}(.{6}).{15}(.{6})(.{6})$/\1\2\3\4/g
                }' $convFile > ${crIN}/.temp
            mv ${crIN}/.temp $convFile
        done
    fi
    
    #QuartzSeq: remove adapter
    if [[ "$technology" == "quartz-seq2-384" ]]; then
        for convFile in "${convFiles[@]}"; do
+96 −0
Original line number Diff line number Diff line
TTTAGG
ATTCCA
GCTCAA
CATCCC
TTGGAC
CTGTGT
GGACAT
CAAAGT
AAGCGG
AATAAA
GAGGAG
GGTACA
AGCGAG
GTCGGT
ATTTGC
AGGACT
GCCCTC
TCGTAA
CCAGAC
TATGTA
ACAATA
ATGCTT
AGTTTA
CACAAG
ATCAAC
TAGTCG
TAGAGA
GTCCCG
TACTTC
AAAGTT
TAAGGG
GTTGCC
AAGTAC
GATCTT
TTAACT
GCGAAT
CCGCTA
TGAAGC
ATACAG
CTTCTG
GAGATC
CCGACG
CTCCAT
AAAACG
TAGCAT
TCGGGT
GTGGTA
CCTAGA
GGGTTT
ATGGCG
TTCATA
AACGCC
GGCTGC
GCTGTG
AGATGG
GTAATG
AGGGTC
ATCTCT
GCCTAG
TCAAAG
CATGAT
TGTGCG
GCAGGA
TCTACC
AGTCGT
CGTGGC
GCGTCC
GAACGC
ACTTAT
TGGATG
TATTGT
ACGTTG
GAATTA
CCATCT
TGATCA
CGTATT
CGGCAG
GACACT
TTCCGC
CTCGCA
GTATAC
TGTCAC
TGCGGA
ACGAGC
ACACCC
CGCTTG
TGCAAT
CAACAA
CTGAAA
AACCTA
ACCTGA
TCACTT
GGGCGA
CGCACC
CGAGTA
CCTTTC
+96 −0
Original line number Diff line number Diff line
CCTAAA
TGGAAT
TTGAGC
GGGATG
GTCCAA
ACACAG
ATGTCC
ACTTTG
CCGCTT
TTTATT
CTCCTC
TGTACC
CTCGCT
ACCGAC
GCAAAT
AGTCCT
GAGGGC
TTACGA
GTCTGG
TACATA
TATTGT
AAGCAT
TAAACT
CTTGTG
GTTGAT
CGACTA
TCTCTA
CGGGAC
GAAGTA
AACTTT
CCCTTA
GGCAAC
GTACTT
AAGATC
AGTTAA
ATTCGC
TAGCGG
GCTTCA
CTGTAT
CAGAAG
GATCTC
CGTCGG
ATGGAG
CGTTTT
ATGCTA
ACCCGA
TACCAC
TCTAGG
AAACCC
CGCCAT
TATGAA
GGCGTT
GCAGCC
CACAGC
CCATCT
CATTAC
GACCCT
AGAGAT
CTAGGC
CTTTGA
ATCATG
CGCACA
TCCTGC
GGTAGA
ACGACT
GCCACG
GGACGC
GCGTTC
ATAAGT
CATCCA
ACAATA
CAACGT
TAATTC
AGATGG
TGATCA
AATACG
CTGCCG
AGTGTC
GCGGAA
TGCGAG
GTATAC
GTGACA
TCCGCA
GCTCGT
GGGTGT
CAAGCG
ATTGCA
TTGTTG
TTTCAG
TAGGTT
TCAGGT
AAGTGA
TCGCCC
GGTGCG
TACTCG
GAAAGG
+10 −0
Original line number Diff line number Diff line
CCTAAA
TGGAAT
TTGAGC
GGGATG
GTCCAA
ACACAG
ATGTCC
ACTTTG
CCGCTT
TTTATT