Commit ead79e31 authored by TomKellyGenetics's avatar TomKellyGenetics
Browse files

correct SCI-Seq barcode whitelists

parent 250e8cf4
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -1603,13 +1603,13 @@ else
        elif [[ "$technology" == "sciseq2" ]]; then
             #generates all combinations of I1-I2-R1 barcodes
             if [[ ! -f ${whitelistdir}/sciseq2_barcode.txt ]]; then
                 join -j 9999 ${whitelistdir}/sci-seq3_i5_barcodes.txt ${whitelistdir}/sci-seq3_i7_barcodes.txt | sed "s/ //g" \
                 join -j 9999 ${whitelistdir}/sci-seq3_i7_barcodes.txt ${whitelistdir}/sci-seq3_i5_barcodes.txt | sed "s/ //g" \
                 | join -j 9999 - ${whitelistdir}/sci-seq3_rt_barcodes.txt | sed "s/ //g" | awk '!a[$0]++'  > ${whitelistdir}/sciseq2_barcode.txt
             fi
        elif [[ "$technology" == "sciseq3" ]]; then
             if [[ ! -f ${whitelistdir}/sciseq3_barcode.txt ]]; then
                 #generates all combinations of I1-I2-R1 barcodes
                 join -j 9999 ${whitelistdir}/sci-seq3_i5_barcodes.txt ${whitelistdir}/sci-seq3_i7_barcodes.txt | sed "s/ //g" \
                 join -j 9999 ${whitelistdir}/sci-seq3_i7_barcodes.txt ${whitelistdir}/sci-seq3_i5_barcodes.txt | sed "s/ //g" \
                 | join -j 9999 - ${whitelistdir}/sci-seq3_hp_barcodes.txt | sed "s/ //g" | join -j 9999 - ${whitelistdir}/sci-seq3_rt_barcodes.txt | sed "s/ //g" \
                 | awk '!a[$0]++'  > ${whitelistdir}/sciseq3_barcode.txt
             fi
@@ -2449,7 +2449,7 @@ else
            mv ${crIN}/.temp $convFile
            #swap barcode and UMI
            echo "  ...barcode and UMI swapped for ${technology}"
            sed -E '2~2s/(.{10})(.{8})(.{10})/\3\1\2/' $convFile > ${crIN}/.temp
            sed -E '2~2s/(.{10})(.{8})(.{10})/\1\3\2/' $convFile > ${crIN}/.temp
            mv ${crIN}/.temp $convFile            

            read=$convFile
+384 −384
Original line number Diff line number Diff line
AAACCAATCT
TAAACCGGAG
TAAACCTCGT
AAACGAAGAA
AAACGTACTT
TAAACGTCAG
TAAACGTTGC
TAAACTCCGG
TAAACTGACC
AAACTGGTAT
AAAGACCATA
AAAGATGGAA
AAAGATTCGA
TAAAGCAGCG
TAAAGGCTGG
TAAAGTACGC
AAAGTCGTTA
AAAGTTAGGA
AAATACCTGT
AAATAGACGA
AAATCATGGT
AAATCGCATA
AAATGAGGTT
TAAATGCGCC
AAATTGGCTA
AACCATAAGT
TAACCATGGC
TAACCGATTG
TAACCGCTGA
TAACCTATCC
AACCTCAATA
TAACCTTGCA
AACGAATTCA
TAACGAGCCT
TAACGAGGAG
AACGATCAAT
AACGCATATA
TAACGCCTTG
TAACGGAAGT
TAACTACGCA
AACTAGAGAA
AACTATTGGA
TAACTCCGAC
AACTCTATGA
TAACTGCAGG
TAACTTGCTG
AAGAAGATCA
AAGAATAGGA
AAGACTTCAA
TAAGAGGTAC
AAGATCTTGA
TAAGCAACGT
TAAGCCATGA
TAAGCCGTTC
TAAGCCTACG
AAGCCTTATA
TAAGCGAGTC
TAAGCTCCAA
AAGCTGATTA
AAGCTTAGAA
AAGGAATCAA
AAGGAGAATA
TAAGGCTCTA
AAGGTAACTA
TAAGGTAGGA
AAGGTATAGA
TAAGGTTCCG
TAAGTAAGCC
AAGTAGCAAA
AAGTCATCTA
TAAGTCGAAG
AAGTTACCAT
AAGTTATGCT
TAAGTTCCTC
AAGTTCGATA
AATAACCAGT
AATAAGGACA
AATACTGCTT
AATAGGAGTA
AATAGGTTGT
AATAGTCCAT
AATCAGCTTT
AATCCAAGTT
TAATCCAGCA
TAATCCTTGG
TAATCGACGG
AATCGAGATA
TAATCGCTAC
AATCTCTCAA
AATGACTGAT
AATGATCGTT
TAATGCGACT
TAATGCGCGA
TAATGCTCAG
AATGGCATAT
TAATGGTACC
AATTCAACCA
AATTCCTAGA
TAATTCGGTC
TAATTGCCGT
AATTGCGAAT
ACAACTAGTT
ACAAGAACTT
ACAAGTCATA
ACAATCAAGT
ACAATGAGAA
TACAATGCTG
TACAGAACGA
ACAGACTAAT
TACAGATGAC
TACAGCGAAC
TACAGCGGTT
ACAGGATTAT
TACAGTAGAG
ACATAATGCT
TACATACGGA
ACATATCCTT
TACATCGTTC
TACATGCGAG
TACATGCTCT
TACCAACCAT
TACCAACTGA
TACCAAGGCA
ACCAATACTT
TACCAGAAGC
TACCAGGTAG
TACCATAGAC
TACCATTAGG
ACCATTCTTA
TACCGAAGGT
TACCGAGCTA
TACCGCTAAG
TACCGGCAAT
TACCGTAATC
TACCGTCTAC
ACCTAGAATT
TACCTATCAG
TACCTCGTCA
ACCTCTTATA
TACCTGATGG
TACCTGGATA
TACCTTACCG
ACCTTAGTTA
TACCTTCCTA
TACGAACGTC
TACGACCTCT
TACGAGAGAA
TACGATAGCT
ACGATGAATT
TACGCAAGTA
TACGCAATGG
TACGCGTTCA
TACGCTACGA
TACGCTCGTT
TACGCTTATC
TACGGATCCA
TACGGTCTTA
TACGTACTAG
TACGTAGACC
ACGTATTGAT
TACGTCAGTC
TACGTCGGAA
TACGTTCCGT
TACGTTGATG
TACTAAGAGC
ACTAAGCAAA
TACTACCGTA
TACTAGACGT
TACTAGTCTC
TACTATCCGG
ACTATGGTTT
ACTATTACCA
ACTCATGATA
TACTCATTCG
TACTCCGAAT
TACTCCTTAC
TACTCGAACC
TACTCGAGGA
TACTCTCCAT
TACTCTGGAG
TACTGATCTG
TACTGCGTTG
TACTGGATGC
ACTGGTATTA
TACTGGTCAA
ACTTAACCAA
TACTTACGCT
ACTTACTTCT
ACTTAGGTAA
ACTTATAGGA
TACTTCGACG
TACTTCTCGA
TACTTGCAAC
TACTTGGCAT
TAGAACCGGT
TAGAACCTCC
AGAACGATTT
AGAACTAAGA
TAGAAGCTGA
TAGAAGGACC
TAGAATGGCG
AGAATTAGCA
TAGACCGCAA
TAGACGATGG
TAGACGCGTT
TAGACGTATC
TAGACTTGCC
TAGAGAACTC
TAGAGAGTTG
TAGAGCGAGA
TAGAGCTTAC
AGAGGAATTT
TAGAGGTCCA
AGAGTAGTAT
AGAGTCAATA
TAGAGTCCTG
TAGATCAACG
AGATCCATAT
AGATGGTAAT
AGATTACGTT
AGATTATCCT
TAGCATCTCG
TAGCATGAAG
TAGCATTGGT
TAGCCAAGTC
TAGCCGAATA
TAGCCTACCA
TAGCCTCTTC
TAGCCTTCTG
TAGCGAATCG
TAGCGACTAT
TAGCGATTGC
TAGCGCAACT
TAGCGGATAA
TAGCGGTATG
AGCTAATACT
TAGCTACGGT
TAGCTCAGAG
AGCTCATTAA
TAGCTGCCAT
TAGCTGGAAC
AGCTTAATGA
TAGCTTCAGC
TAGGAACGAA
TAGGAAGACG
TAGGAAGCAT
AGGAAGTTAT
TAGGACTACC
TAGGACTCAA
TAGGAGACCA
TAGGAGATAG
TAGGAGCAAC
TAGGAGTTCT
TAGGATACTG
TAGGCATCTT
AGGCATTAAT
TAGGCCAGAA
TAGGTACCGA
TAGGTACTCT
TAGGTCCAGT
TAGGTCGATC
TAGGTCTTCG
AGGTTAAGAA
TAGGTTAGCG
TAGGTTCTGG
AGTAAGTAGA
AGTAATCCTT
AGTAATGGAA
AGTACCTAAT
TAGTAGAGAC
TAGTAGCAGG
TAGTAGCCTT
AGTAGGATAT
TAGTATCTGC
AGTATTCAGT
AGTCAACTAT
TAGTCCGATG
TAGTCGCCAA
TAGTCGTTAG
AGTCTTAACA
TAGTTACGAC
AGTTCAGTTT
AGTTCTCATA
AGTTGAATCT
TAGTTGCTTG
TAGTTGGCGA
ATAACTCCAA
ATAACTTGGA
TATAAGGCAG
ATAAGGTCTT
ATAATAGGCA
TATACCGCTC
ATACGAATGT
TATACGACGC
TATACGCCAT
ATACTCTCTA
ATACTGGATT
ATAGAATCCA
ATAGCATTGT
TATAGCCAGC
TATAGGAGCA
TATAGGTACG
ATAGTACCTA
TATAGTCGCC
ATATAAGCGT
TATATACCGC
TATATCGTCG
TATATGGCGT
ATATTGAGGT
ATATTGCCAA
TATCAACGAG
TATCAAGCGT
TATCAAGTCC
ATCAGATTCA
TATCAGGAGG
TATCAGGCCA
ATCAGTTGAA
ATCATCAGAT
ATCCAGATAT
TATCCGATCT
TATCCGCGAA
TATCCGTACC
TATCCTGGTC
TATCCTTCGA
TATCGCATCA
TATCGTCCTT
TATCGTTGCT
ATCTAACTCA
ATCTAGGAAT
TATCTCCTGA
TATCTCGAGC
TATCTCTCGG
ATCTGAAGAT
TATCTTAGCC
TATCTTCCAG
ATGAACCTTA
ATGAATGCAT
TATGACCGGA
TATGACGGTC
TATGAGACGG
TATGAGCCAA
TATGAGCTCC
ATGAGTAAGT
ATGATATCGT
TATGATGCCT
TATGATGGAG
ATGATTCTCA
TATGCAGGAC
TATGCCAACC
TATGCCAGTT
TATGCCGTAG
ATGCCTATAA
TATGCGATAC
TATGCGTCGT
ATGCTTGAAT
TATGGAACCG
TATGGAGAGA
TATGGATTGG
TATGGCGCTT
TATGGCTACT
TATGGTAGTC
ATGGTTAACT
TATTAACGCC
ATTACCATGT
TATTACGGCG
ATTAGTAGCA
ATTATGCGAA
TATTCAGCAG
ATTCATCTCT
TATTCCGCCT
ATTCCTCAAT
TATTCGACCA
ATTCGCTTAT
TATTCGGAGA
ATTCGGTATT
TATTCTCCGC
TATTGAGCGC
ATTGAGTCAA
TATTGCCGAC
TATTGCCTCG
ATTGGAGTTT
ATTGGATAGA
TATTGGTCGG
ATTGGTTCTA
ACTTGATTGT
TAATCAGCTT
TAAGAATGGT
ACAACCTATT
ATTCCTAGAT
TTGATTCCTT
AAGGATTACT
TCCTATAAGT
TATAAGAGGT
TATTCTCCTT
TAATACCAGT
TGGATTCTAT
TCTTAGTTCT
TCTATCCAAT
AATTCATCGT
TAGATGACTT
AAATTCCTCT
ATTAGGTACT
TGGTTGAATT
AACTAGTTGT
TTCTTCGTTT
AAGTTCTTGT
ACTTACTCAT
ACCATGATTT
TAACGACTTT
TTTGCTACTT
TAAGGTTCAT
AGTATTAGCT
ACAGGTATTT
TTCCATCTTT
ATTGATCGTT
TGAATCTGAT
TAGAACCAAT
TTCTCATTGT
ATATGGATCT
ATCAGTCATT
TATCTCGATT
AAACTCCAAT
TCAAGATCTT
ATAACTTCCT
AAGGATATGT
TTACCTAAGT
ATATCCTACT
ATTAGTCTGT
TGTCCTTATT
TTGACTCAAT
ATACCAGTTT
TCCTAACTTT
TGGCGTTAAT
TGGTACCATT
TTTGACCAGT
TCGAATGAGT
TCAAGCAACT
TTTCGTTCCT
TTTGGAGCTT
TTAGGAAGGT
TCAGTATCCT
TCCTAATGGT
TGAGGAACTT
TAGTCGCATT
TACGTTGCTT
TAGCGTAAGT
TTGCAAGGTT
TCTTCGACTT
TTTATCCGCT
TAGAGTACCT
TCTGAGCATT
TGCTCTTAGT
TTGCTGGATT
TACGCTTGAT
TCAGCATTGT
TATGGTTGGT
TCGTACCTAT
TCTCGTTGAT
TCATCAACGT
TCAGCAAGTT
TTCGAGAAGT
TCCAAGGATT
TAAGGACGAT
TAACTGGCAT
TGATAAGCGT
TGCGTACTTT
TGGCTAAGAT
TCTGCCTTAT
TGTCTATGGT
TGAGTTCTCT
TTACTTGCGT
TGATACGTCT
TAGATCGGAT
TTCAGCTTCT
TTCGAAGGAT
TGGTCAGTTT
TTGATGCGAT
TCTGATAGGT
TGTCTCTACT
TGTCGTAACT
AAACCATAGT
AAACTGAACT
AAAGCTGATT
AAATCGTTCT
AACCTCATTT
TAACGAGCGT
AACGATCATT
TAACGCGTCT
AACGTAATCT
AACTTGGATT
AAGACCTTAT
AAGCAGTATT
TAAGGCTACT
AAGTACGTTT
AAGTATTGGT
AATACCGAAT
AATCCAGTAT
AATTCTAGGT
ACAATGCTAT
ACATGGTAAT
ACATTCGTAT
TACCGGTTCT
ACCTCAATAT
TACGACGCAT
ACGATATCAT
ACGCTTATAT
TACGGAACGT
TACGTCTAGT
ACTGAATACT
TACTGGACCT
ACTGGTTATT
ACTTATGGTT
TACTTCCGTT
TAGAACTCCT
AGAAGTAAGT
AGAGATGAAT
TAGAGCATGT
TAGAGGTCGT
AGATTCAACT
AGATTGGTTT
AGCATAACTT
AGCATTATGT
TAGCTATCGT
AGGATAATCT
TAGGCGGAAT
TAGGCTCGTT
TAGTAGCCAT
AGTTAACCAT
ATAAGCGAAT
ATAATCCTGT
ATACTACTCT
ATAGTTGACT
ATATCTGGAT
ATATGCCATT
ATCAATACGT
ATCTGATGAT
ATCTTCAGAT
ATGCATTCAT
TATGCTTCCT
ATGGACTATT
ATGGTAACTT
ATTAATGCCT
ATTACCATCT
ATTCAAGCAT
ATTCGCAATT
TATTCGGAGT
ATTGAGGAAT
TCAACTCTCT
TCAAGGCGTT
TCAGATCAGT
TCAGGACTCT
TCATACCGCT
TCCAATCCAT
TCCAGCCTTT
TCCATCGTCT
TCCGACCAAT
TCCGTCGATT
TCCTGCAGTT
TCCTGCTACT
TCGAAGACCT
TCGAGGCAAT
TCGATTCGCT
TCGCCATTCT
TCGCTAACCT
TCGCTGCTTT
TCGGAACCTT
TCGGTAAGGT
TCGTAGGCTT
TCGTCTTCCT
TCGTTGATCT
TCTAACGACT
TCTACCTGGT
TCTCCATCAT
TCTCGCATGT
TCTCTGAGCT
TCTGACGTTT
TCTTAGCGGT
TGACCGAATT
TGACTACCAT
TGACTCGCTT
TGACTGACGT
TGAGCGGTAT
TGATTACGGT
TGCAACGTTT
TGCGCTCAAT
TGCGGAGAAT
TGCGTCGTAT
TGCTCGAGAT
TGCTTCTGGT
TGGATAGGTT
TGGCGACTAT
TGGCGCATTT
TGGTACGGAT
TGGTAGTCCT
TGGTCCTTCT
TGTACCTCTT
TGTAGCGATT
TGTCGGAGTT
TGTCGGCAAT
TGTTCCAGCT
TGTTCGCTGT
TGTTGCTCCT
TTAAGACCGT
TTAGCTCGGT
TTATCCAGGT
TTCAGCGGTT
TTCAGTTGGT
TTCCGGTCAT
TTCCGTATGT
TTCCTACCTT
TTCGCGCATT
TTCGGTACCT
TTCGTAGCGT
TTCTCCGAAT
TTCTCTCCAT
TTGAACGCGT
TTGCCTTGGT
TTGCGTAGTT
TTGCTCCTAT
TTGGACCTCT
TTGGCCTGAT
TTGGTAGGCT
TTGGTCGAAT
TTGGTCTCCT
TTTCCGACGT
TTTCTCTCGT
TTTCTGGCCT
TTTGAGTCCT
TTTGCGGTCT
TTTGGCGACT
TAACCGCTGT
TAAGATGCCT
TAAGCGCCAT
TACCAATGCT
TACCGTAGCT
TACCTTCGGT
TACGAGGTTT
TACGCCATAT
TACGGCAATT
TAGCAACGAT
TAGGCATCAT
TAGTTGCGCT
TATAGTCGCT
TATGCCAAGT
TATGGAGAGT
TATGGCAGCT
TATGGCGTAT
TATTGCCGGT
TCAACGCAGT
TCAATCGGTT
TCAGAAGGCT
TCATCGGACT
TCCAGAACCT
TCCATCAGGT
TCCATTGCGT
TCCGAGAGAT
TCCGGAGTTT
TCCGGATAGT
TCCGTCTCAT
TCCTCCTGAT
TCGACGATAT
TCGAGATGCT
TCGCCGTAAT
TCGGTTCCAT
TCGTCGAAGT
TCTACGGCAT
TCTAGTACGT
TCTATCTCCT
TCTCCAGAGT
TCTCCGGTTT
TCTCCTCGTT
TCTCTACTGT
TCTGCTGAAT
TCTGGAAGAT
TCTTCATGCT
TGAACGATGT
TGAACGGCTT
TGAAGAGGCT
TGACCAGGAT
TGACCGTCAT
TGACGTTCGT
TGACTTGGCT
TGAGACTAGT
TGATCGACCT
TGCAATCGCT
TGCAGATACT
TGCATCCAGT
TGCCATGGTT
TGCGGTATAT
TGCTGAAGCT
TGCTGGCTAT
TGGACTTGAT
TGGAGCTCAT
TGGAGGTTCT
TGGCAAGTCT
TGGCTTACTT
TGGTCTACGT
TGGTTCGAGT
TGGTTGGCAT
TGTAAGCTCT
TGTAAGGAGT
TGTAGACGGT
TGTATCGCAT
TGTCATCTGT
TGTCCTGCAT
TGTTGCAAGT
TAACTAAGGT
TTACGGTAGT
TACTCCTATT
TAGAGAGTAT
TTAGAGCCTT
TAGCCAATTT
TAGGTACTAT
TAGTTACCTT
TATAAGGCTT
TATATGCGTT
TATCATGAGT
TATCGAACTT
TATGACTTGT
TATGAGAACT
TATGCGATTT
TATGGTCTTT
TTATTCGGCT
TATTGACTCT
TATTGAGGTT
TTCAGGAGAT
TCATAGAGTT
TTCATGGCTT
TCATTAAGCT
TCCAAGTTAT
TCCAATAGTT
TCCTATTCTT
TTCCTCGAGT
TCGAATCTTT
TTCGCCAACT
TCGTCTATTT
TTCGTTCTGT
TCTACTTACT
TCTAGGAATT
TCTATACCTT
TTCTCGCTCT
TGAATTCGTT
TTGACGAGGT
TGAGAATCAT
TGAGTTAGAT
TGATCTTCTT
TGCCTATTAT
TGCTAATTCT
TGCTACTAAT
TTGGATCCGT
TGGTAATAGT
TGTTAAGACT
TTAATGAGCT
TTATAGGCAT
TTCAACTGAT
TTCCATTACT
TTCGCATAAT
TTTCGCGGAT
TTCTAAGCTT
TTCTCTAGTT
TTCTTAACCT
TTGAAGTCTT
TTGAGAGATT
TTGGAGTTAT
TTGGCAATAT
TTTGGCTCAT
TTGGTTAAGT
TTTGCTTAGT