Commit 331f14cb authored by TomKellyGenetics's avatar TomKellyGenetics
Browse files

Merge branch 'master' of dgt-gitlab.gsc.riken.jp:tom/cellranger_convert

parents 4dd4d9d6 940399a4
Loading
Loading
Loading
Loading
+38 −11
Original line number Diff line number Diff line
@@ -82,9 +82,10 @@ Mandatory arguments to long options are mandatory for short options too.
                                  Quartz-Seq2 (14bp barcode, 8bp UMI): quartzseq2-384
                                  Quartz-Seq2 (15bp barcode, 8bp UMI): quartzseq2-1536
                                  Sci-Seq (8bp UMI, 10bp barcode): sciseq
                                  SCRB-Seq (6bp barcode, 10bp UMI): scrbseq, mcscrbseq
                                  SeqWell (12bp barcode, 8bp UMI): seqwell
                                  Smart-seq2-UMI, Smart-seq3 (11bp barcode, 8bp UMI): smartseq
                                  SCRUB-Seq (6bp barcode, 10bp UMI): scrubseq
                                  SureCell (18bp barcode, 8bp UMI): surecell, biorad
                                  SureCell (18bp barcode, 8bp UMI): surecell, ddseq, biorad
                                Custom inputs are also supported by giving the name "custom" and length of barcode and UMI separated by "_"
                                  e.g. Custom (16bp barcode, 10bp UMI): custom_16_10
  -b,  --barcodefile FILE       Custom barcode list in plain text (with each line containing a barcode)
@@ -496,16 +497,20 @@ if [[ "$technology" == "sciseq" ]] || [[ "$technology" == "sci-seq" ]]; then
    echo "Running with Sci-Seq parameters (single-cell combinatorial indexing RNA sequencing)"
    technology="sciseq"
fi
if [[ "$technology" == "scrubseq" ]] || [[ "$technology" == "scrub-seq" ]]; then
    echo "Running with SCRUB-Seq parameters"
    technology="scrubseq"
if [[ "$technology" == "scrbseq" ]] || [[ "$technology" == "scrb-seq" ]] || [[ "$technology" == "mcscrbseq" ]] || [[ "$technology" == "mcscrb-seq" ]]; then
    echo "Running with SCRB-Seq / mcSCRB-seq parameters"
    technology="scrbseq"
fi
if [[ "$technology" == "seqwell" ]] || [[ "$technology" == "seq-well" ]]; then
    echo "Running with Sci-Seq-Well parameters"
    technology="seqwell"
fi
if [[ "$technology" == "smartseq" ]] || [[ "$technology" == "smart-seq" ]] || [[ "$technology" == "smartseq2" ]] || [[ "$technology" == "smart-seq2" ]] ||  [[ "$technology" == "smartseq2-umi" ]] || [[ "$technology" == "smart-seq2-umi" ]] ||  [[ "$technology" == "smartseq3" ]] || [[ "$technology" == "smart-seq3" ]]; then
    echo "Running with Smart-Seq3 parameters (version 3 with UMIs)"
    echo "***WARNING: Smart-Seq settings should only be used for kits that have UMIs***"
    technology="smartseq"
fi
if [[ "$technology" == "surecell" ]] || [[ "$technology" == "surecellseq" ]] || [[ "$technology" == "surecell-seq" ]]|| [[ "$technology" == "bioraad" ]]; then
if [[ "$technology" == "surecell" ]] || [[ "$technology" == "surecellseq" ]] || [[ "$technology" == "surecell-seq" ]] || [[ "$technology" == "ddseq" ]] || [[ "$technology" == "dd-seq" ]] || [[ "$technology" == "bioraad" ]]; then
    echo "Running with SureCell parameters"
    technology="surecell"
fi
@@ -531,7 +536,8 @@ if [[ "$technology" != "10x" ]] \
&& [[ "$technology" != "indrop"* ]] \
&& [[ "$technology" != "quartz-seq2"* ]] \
&& [[ "$technology" != "sciseq" ]] \
&& [[ "$technology" != "scrubseq" ]] \
&& [[ "$technology" != "scrbseq" ]] \
&& [[ "$technology" != "seqwell" ]] \
&& [[ "$technology" != "smart-seq"* ]]\
&& [[ "$technology" != "surecell" ]]; then
    if [[ "$technology" != "custom"* ]]; then
@@ -793,7 +799,7 @@ else
        echo "***WARNING: barcodes not available for Smart-Seq 2 or 3, using iCELL8 whitelist (version 3)***"
        echo "...valid barcodes may be an overestimate"
        barcodefile=${SDIR}/iCell8_barcode.txt
    elif [[ "$technology" == "custom"* ]] || [[ "$technology" == "celseq"* ]] ||  [[ "$technology" == "scrubseq" ]] || [[ "$technology" == "sciseq" ]] || [[ "$technology" == "surecell" ]]; then
    elif [[ "$technology" == "custom"* ]] || [[ "$technology" == "celseq"* ]] ||  [[ "$technology" == "sciseq" ]] || [[ "$technology" == "scrbseq" ]] || [[ "$technology" == "seqwell" ]] || [[ "$technology" == "surecell" ]]; then
        if [[ "$technology" == "celseq" ]]; then
            customname="celseq"
            minlength=8
@@ -803,9 +809,12 @@ else
        elif [[ "$technology" == "sciseq" ]]; then
            customname="sciseq"
            minlength=10
        elif [[ "$technology" == "scrubseq" ]]; then
            customname="scrubseq"
        elif [[ "$technology" == "scrbseq" ]]; then
            customname="scrbseq"
            minlength=6
        elif [[ "$technology" == "seqwell" ]]; then
            customname="seqwell"
            minlength=12
        elif [[ "$technology" == "surecell" ]]; then
            customname="surecell"
            barcodelength=18
@@ -964,9 +973,12 @@ elif [[ "$technology" == "quartz-seq2-1536" ]]; then
elif [[ "$technology" == "sciseq" ]]; then
    barcodelength=10
    umilength=8
elif [[ "$technology" == "scrubseq" ]]; then
elif [[ "$technology" == "scrbseq" ]]; then
    barcodelength=6 
    umilength=10
elif [[ "$technology" == "seqwell" ]]; then
    barcodelength=8
    umilength=12
elif [[ "$technology" == "smartseq" ]]; then
    barcodelength=11
    umilength=8
@@ -1313,6 +1325,21 @@ else
        done
    fi

    #remove adapter from SCRB-Seq
    if [[ "$technology" == "scrbseq" ]];
        for convFile in "${convFiles[@]}"; do
            #remove adapters
            sed -E '
                /TCTTCCGATCT(.{6})(.{10})/ {
                s/TCTTCCGATCT(.{6})(.{10})/\1\2/g
                n
                n
                s/.{11}(.{6})(.{10})/\1\2/g
                }' $convFile > ${crIN}/.temp
            mv ${crIN}/.temp $convFile
        done
    fi

    #converting barcodes
    echo " adjusting barcodes of R1 files"
    if [[ $barcodeadjust != 0 ]]; then