Commit dddf7af5 authored by TomKellyGenetics's avatar TomKellyGenetics
Browse files

update white lists for SCI-Seq (3-levels) and local test job

parent ead79e31
Loading
Loading
Loading
Loading
+25 −8
Original line number Diff line number Diff line
@@ -1603,18 +1603,24 @@ else
        elif [[ "$technology" == "sciseq2" ]]; then
             #generates all combinations of I1-I2-R1 barcodes
             if [[ ! -f ${whitelistdir}/sciseq2_barcode.txt ]]; then
                 join -j 9999 ${whitelistdir}/sci-seq3_i7_barcodes.txt ${whitelistdir}/sci-seq3_i5_barcodes.txt | sed "s/ //g" \
                 | join -j 9999 - ${whitelistdir}/sci-seq3_rt_barcodes.txt | sed "s/ //g" | awk '!a[$0]++'  > ${whitelistdir}/sciseq2_barcode.txt
                 join -j 9999 ${whitelistdir}/sci-seq3_i7_barcodes.txt ${whitelistdir}/sci-seq3_i5_barcodes.txt | sed "s/ //g" | \
                 join -j 9999 - ${whitelistdir}/sci-seq3_rt_barcodes.txt | sed "s/ //g" | awk '!a[$0]++'  > ${whitelistdir}/sciseq2_barcode.txt
             fi
        elif [[ "$technology" == "sciseq3" ]]; then
             if [[ ! -f ${whitelistdir}/sciseq3_barcode.txt ]]; then
                 #generates all combinations of I1-I2-R1 barcodes
                 join -j 9999 ${whitelistdir}/sci-seq3_i7_barcodes.txt ${whitelistdir}/sci-seq3_i5_barcodes.txt | sed "s/ //g" \
                 | join -j 9999 - ${whitelistdir}/sci-seq3_hp_barcodes.txt | sed "s/ //g" | join -j 9999 - ${whitelistdir}/sci-seq3_rt_barcodes.txt | sed "s/ //g" \
                 | awk '!a[$0]++'  > ${whitelistdir}/sciseq3_barcode.txt
                 join -j 9999 ${whitelistdir}/sci-seq3_i7_barcodes.txt ${whitelistdir}/sci-seq3_i5_barcodes.txt | sed "s/ //g" | \
                 join -j 9999 - ${whitelistdir}/sci-seq3_hp_barcodes.txt | sed "s/ //g" | join -j 9999 - ${whitelistdir}/sci-seq3_rt_barcodes.txt | sed "s/ //g" \
                 > ${whitelistdir}/sciseq3_barcode.txt
                 ## to filter unique lines: awk '!a[$0]++'  > ${whitelistdir}/sciseq3_barcode.txt
             fi
        else
            #generating permutations of ATCG of barcode length (non-standard evaluation required to run in script)
            if [[ ${barcodelength} -ge 12 ]]; then
                echo "  ... generating all permutations of A,T,C,G of length ${barcodelength}"
                echo "  WARNING: for large barcodes this could take a lot of time and memory"
                echo "  Please use a known barcode whitelist if possible"
            fi
            echo $(eval echo $(for ii in $(eval echo {1..${barcodelength}}); do echo "{A,T,C,G}"; done | tr "\n" " " | sed "s/ //g" | xargs -I {} echo {})) | sed 's/ /\n/g' | sort | uniq > ${barcodefile}
        fi
    fi
@@ -2405,6 +2411,7 @@ else
        echo "  ...remove adapter for ${technology}"
        for convFile in "${convFiles[@]}"; do
            #remove adapter if detected (two-level indexing)
             ## TruSeq adapter: ACGACGCTCTTCCGATCT
            sed -E '
                /^ACGACGCTCTTCCGATCT/ {
                s/^ACGACGCTCTTCCGATCT(.{18})/\1/g
@@ -2439,9 +2446,19 @@ else
        echo "  ...remove adapter for ${technology}"
        for convFile in "${convFiles[@]}"; do
            #remove adapter if detected (and keep hairpin/tn5 barcode)
            ## TruSeq adapter: ACGACGCTCTTCCGATCT
            sed -E '
                /^ACGACGCTCTTCCGATCT/ {
                s/^ACGACGCTCTTCCGATCT//g
                n
                n
                s/^.{18}//g
                }'  $convFile > ${crIN}/.temp
            mv ${crIN}/.temp $convFile
            #remove linker
            sed -E '
                /^ACGACGCTCTTCCGATCT(.{10})CAGAGC/ {
                s/^ACGACGCTCTTCCGATCT(.{10})CAGAGC(.{18})/\1\2/g
                /^(.{10})CAGAGC/ {
                s/^(.{10})CAGAGC(.{18})/\1\2/g
                n
                n
                s/^(.{26})(.{10})(.{6})(.{18})/\2\4/g
+62 −0
Original line number Diff line number Diff line
#!/bin/bash

# run tests in universc directory (parent of test directory)
cd $(dirname ${BASH_SOURCE[0]})/..
pwd
##git pull --ff-only origin $(git branch --show-current) 

# used to export to PATH for testing on SGE server
export PATH=${HOME}/local/bin/cellranger-3.0.2:$PATH

cellrangerversion=`cellranger count --version | head -n 2 | tail -n 1 | cut -f2 -d'(' | cut -f1 -d')'`
cellrangerpath=`which cellranger`

# set up cellranger reference
if [[ ! -f test/cellranger_reference/cellranger-tiny-ref/3.0.0/star/SA ]] && [[ -f $(dirname $cellrangerpath)/cellranger-tiny-ref/3.0.0/star/SA ]]; then
    rsync $(dirname $cellrangerpath)/cellranger-tiny-ref/3.0.0/star/SA test/cellranger_reference/cellranger-tiny-ref/3.0.0/star/SA
fi
if [[ ! -f test/cellranger_reference/cellranger-tiny-ref/1.2.0/star/SA ]] && [[ -f $(dirname $cellrangerpath)/cellranger-tiny-ref/1.2.0/star/SA ]]; then
    rsync $(dirname $cellrangerpath)/cellranger-tiny-ref/1.2.0/star/SA test/cellranger_reference/cellranger-tiny-ref/1.2.0/star/SA
fi

##unpigz -k test/shared/sciseq-v3-test/SRR7827205*fastq.gz

if [ -f test/shared/sciseq-v3-test/SRR7827205_S1_L001_R1_001.fastq ]; then
    rename "s/_S1_L001/_L001/" test/shared/sciseq-v3-test/SRR7827205_S1_L001_R1_001.fastq*
fi
if [ -f test/shared/sciseq-v3-test/SRR7827205_S1_L001_R1_001.fastq ]; then
    rename "s/_S1_L001/_L001/" test/shared/sciseq-v3-test/SRR7827205_S1_L001_R1_001.fastq*
fi
if [ -f test/shared/sciseq-v3-test/SRR7827205_S2_L002_R1_001.fastq ]; then
    rename -n "s/_S2_L002/_L002/" test/shared/sciseq-v3-test/SRR7827205_S2_L002_R1_001.fastq*
fi
if [ -f test/shared/sciseq-v3-test/SRR7827205_S2_L002_R2_001.fastq ]; then
    rename -n "s/_S2_L002/_L002/" test/shared/sciseq-v3-test/SRR7827205_S2_L002_R2_001.fastq*
fi
if [ -d test-icell8-72618-KU812-2-lanes ];then
    rm -rf test-icell8-72618_KU812-2-lanes
fi

if [[ ! -f whitelists/sciseq3_barcode_test.txt ]]; then
    bash launch_universc.sh --id "test-sciseq" --technology "sciseq3" --setup --verbose
    grep "CCGAATCCGACTCCATCGA" whitelists/sciseq3_barcode.txt > whitelists/sciseq3_barcode_test.txt
fi

bash launch_universc.sh --id "test-sciseq" --technology "sciseq" \
 --reference "test/cellranger_reference/cellranger-tiny-ref/3.0.0" \
 --read1 "test/shared/sciseq-v3-test/SRR7827205_S1_R1.fastq.gz" \
 --read2 "test/shared/sciseq-v3-test/SRR7827205_S1_R2.fastq.gz" \
 --barcodefile whitelists/sciseq3_barcode_test.txt \
 --jobmode "local" --localcores 1 --verbose

bash launch_universc.sh --id "test-sciseq" --technology "sciseq" \
 --reference "test/cellranger_reference/cellranger-tiny-ref/3.0.0" \
 --read1 "test/shared/sciseq-v3-test/SRR7827205_S1_R1.fastq.gz" \
 --read2 "test/shared/sciseq-v3-test/SRR7827205_S1_R2.fastq.gz" \
 --jobmode "local" --localcores 1 

if [ -f test/shared/sciseq-v3-test/SRR7827205_S1_L001_R1_001.fastq.gz ]; then
    rename "s/_S1_L001/_S1/" test/shared/sciseq-v3-test/SRR7827 205_S1_L001_[IR][12]_001.fastq*
    rename "s/_001//" test/shared/sciseq-v3-test/SRR7827205_S1_[IR][12]_001.fastq*
fi
gzip test/shared/sciseq-v3-test/SRR7827205_S1_[IR][12].fastq