Commit c79db871 authored by kai.b's avatar kai.b
Browse files

allowing custom barcode file for all technology

parent 4bbe6cb9
Loading
Loading
Loading
Loading
+4969 −4969

File changed.

Preview size limit exceeded, changes collapsed.

+141 −152
Original line number Diff line number Diff line
@@ -38,7 +38,7 @@ SDIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )"
RDIR="$( dirname "$SOURCE" )"

if [[ $RDIR != $SDIR ]]; then
    echo "DIR '$RDIR' resolves to '$SDIR'"
    echo "'$RDIR' resolves to '$SDIR'"
fi
echo "Running launch_universc.sh in '$SDIR'"

@@ -86,8 +86,8 @@ Mandatory arguments to long options are mandatory for short options too.
  
  -p,  --per-cell-data          Generates a file with basic run statistics along with per-cell data 
  
  -s,  --setup                  Set up whitelists for compatibility with new technology
  -a,  --as-is                  Skips the FASTQ file conversion if converted files already exist
       --setup                  Set up whitelists for compatibility with new technology and exit
       --as-is                  Skips the FASTQ file conversion if the file already exists
  
  -h,  --help                   Display this help and exit
  -v,  --version                Output version information and exit
@@ -117,7 +117,10 @@ fi
#set options
lockfile=${cellrangerpath}-cs/${cellrangerversion}/lib/python/cellranger/barcodes/.lock #path for .lock file
lastcallfile=${cellrangerpath}-cs/${cellrangerversion}/lib/python/cellranger/barcodes/.last_called #path for .last_called
lastcall=`[ -e $lastcallfile ] &&  cat $lastcallfile || echo ""`
lastcall=`[[ -e $lastcallfile ]] &&  cat $lastcallfile || echo ""`
lastcall_b=`echo ${lastcall} | cut -f1 -d' '`
lastcall_u=`echo ${lastcall} | cut -f2 -d' '`
lastcall_p=`echo ${lastcall} | cut -f3 -d' '`
barcodedir=${cellrangerpath}-cs/${cellrangerversion}/lib/python/cellranger/barcodes #folder within cellranger with the whitelist barcodes
barcodefile=""
crIN=input4cellranger #name of the directory with all FASTQ files given to cellranger
@@ -126,8 +129,8 @@ percellfile="outs/basic_stats.txt" #name of the file with the basic statistics o

#variable options
setup=false
testrun=false
convert=true
testrun=false
read1=()
read2=()
SAMPLE=""
@@ -136,8 +139,8 @@ id=""
description=""
reference=""
ncells=""
chemistry=""
jobmode=""
chemistry="SC3Pv2"
jobmode="local"
ncores=""
mem=""
percelldata=false
@@ -327,12 +330,12 @@ for op in "$@"; do
            next=false
            shift
            ;;
        -s|--setup)
        --setup)
            setup=true
            next=false
            shift
            ;;
        -a|--as-is)
        --as-is)
            convert=false
            next=false
            shift
@@ -369,27 +372,32 @@ fi

#check if this is a test run
if [[ $testrun == "true" ]]; then
    if [[ ]] || [[ ]]; then
    if [[ ${#read1[@]} -gt 0 ]] || [[ ${#read2[@]} -gt 0 ]]; then
        echo "Error: for test run, no R1 or R2 file can be selected."
        exit 1
    fi
    
    reference=${SDIR}/test/cellranger_reference/cellranger-tiny-ref/3.0.0
    if [[ -z $id ]]; then
    
    percelldata=true
    id=test-tiny-${technology}
    fi
    description=${id}
    
    if [[ $technology == "10x" ]]; then
        gunzip -k ${SDIR}test/shared/cellranger-tiny-fastq/3.0.0/tinygex_S1_L00[12]_R[12]_001.fastq.gz
        read1=("test/shared/cellranger-tiny-fastq/3.0.0/tinygex_S1_L001_R1_001.fastq" "test/shared/cellranger-tiny-fastq/3.0.0/tinygex_S1_L002_R1_001.fastq")
        read2=("test/shared/cellranger-tiny-fastq/3.0.0/tinygex_S1_L001_R1_002.fastq" "test/shared/cellranger-tiny-fastq/3.0.0/tinygex_S1_L002_R2_001.fastq")
        gunzip -k ${SDIR}/test/shared/cellranger-tiny-fastq/3.0.0/tinygex_S1_L00[12]_R[12]_001.fastq.gz
        read1=("${SDIR}/test/shared/cellranger-tiny-fastq/3.0.0/tinygex_S1_L001_R1_001.fastq" "${SDIR}/test/shared/cellranger-tiny-fastq/3.0.0/tinygex_S1_L002_R1_001.fastq")
        read2=("${SDIR}/test/shared/cellranger-tiny-fastq/3.0.0/tinygex_S1_L001_R1_002.fastq" "${SDIR}/test/shared/cellranger-tiny-fastq/3.0.0/tinygex_S1_L002_R2_001.fastq")
    elif [[ $technology == "nadia" ]]; then
        gunzip -k test/shared/dropseq-test/SRR1873277_S1_L001_R[12]_001.fastq
        read1=("test/shared/dropseq-test/SRR1873277_S1_L001_R1_001.fastq")
        read2=("test/shared/dropseq-test/SRR1873277_S1_L001_R2_001.fastq")
        gunzip -k ${SDIR}/test/shared/dropseq-test/SRR1873277_S1_L001_R[12]_001.fastq
        read1=("${SDIR}/test/shared/dropseq-test/SRR1873277_S1_L001_R1_001.fastq")
        read2=("${SDIR}/test/shared/dropseq-test/SRR1873277_S1_L001_R2_001.fastq")
    elif [[ $technology == "icell8" ]]; then
        gunzip -k test/shared/mappa-test/test_FL_R[12].fastq.gz
        read1=("test/shared/mappa-test/test_FL_R1.fastq")
        read2=("test/shared/mappa-test/test_FL_R2.fastq")
        gunzip -k ${SDIR}/test/shared/mappa-test/test_FL_R[12].fastq.gz
        read1=("${SDIR}/test/shared/mappa-test/test_FL_R1.fastq")
        read2=("${SDIR}/test/shared/mappa-test/test_FL_R2.fastq")
    else
        echo "Error: for test run, option --technology must be 10x, nadia, or icell8"
	exit 1
    fi
fi

@@ -403,6 +411,14 @@ if ! [[ -w "$barcodedir" ]]; then
    exit 1
fi

#check if convert is writable
if ! [[ -w "$SDIR" ]]; then
    echo "Error: Trying to run launch_universc.sh installed at $SDIR"
    echo "$SDIR must be writable to run launch_universc.sh"
    echo "Install launch_universc.sh in a directory with write permissions such as /home/`whoami`/local and export to the PATH"
    exit 1
fi

#check if technology matches expected inputs
if [[ "$technology" != "10x" ]] && [[ "$technology" != "nadia" ]] && [[ "$technology" != "icell8" ]]; then
    if [[ "$technology" != "custom"* ]]; then
@@ -419,7 +435,6 @@ if [[ "$technology" != "10x" ]] && [[ "$technology" != "nadia" ]] && [[ "$techno
            echo "Error: when option -t is set as custom, a file with a list of barcodes needs to be specified with option -b."
            exit 1
        fi
	setup=true
    fi
fi

@@ -624,13 +639,26 @@ for fq in "${read12[@]}"; do
done
LANE=$(echo "${LANE[@]}" | tr ' ' '\n' | sort -u | tr '\n' ',' | sed 's/,$//')

#checking the quality of custom barcode file
#select the input barcode file
if [[ ! -z "$barcodefile" ]]; then
    if [[ ! -f $barcodefile ]]; then
        echo "Error: File selected for option --barcodefile does not exist"
	exit 1
    else
        barcodefile=`readlink -f $barcodefile`
    fi
else
    if [[ "$technology" == "10x" ]]; then
        barcodefile=default
    elif [[ "$technology" == "nadia" ]]; then
        barcodefile=${SDIR}/nadia_barcode.txt
        if [[ ! -f ${barcodefile} ]]; then
            #creat a nadia barcode file
            echo {A,T,C,G}{A,T,C,G}{A,T,C,G}{A,T,C,G}{A,T,C,G}{A,T,C,G}{A,T,C,G}{A,T,C,G}{A,T,C,G}{A,T,C,G}{A,T,C,G}{A,T,C,G} | sed 's/ /\n/g' | sort | uniq > ${barcodefile}
        fi
    elif [[ "$technology" == "icell8" ]]; then
        barcodefile=${SDIR}/iCell8_barcode.txt
    fi
fi

#check if reference is present
@@ -649,6 +677,7 @@ elif ! [[ $ncells =~ $int ]] && [[ $setup == "false" ]]; then
    echo "Error: option --force-cells must be an integer"
    exit 1
fi

#check if ncores is an integer
int='^[0-9]+$'
if [[ -z "$ncores" ]]; then
@@ -657,6 +686,7 @@ elif ! [[ $ncores =~ $int ]] && [[ $setup == "false" ]]; then
    echo "Error: option --localcores must be an integer"
    exit 1
fi

#check if mem is a number
int='^[0-9]+([.][0-9]+)?$'
if [[ -z "$mem" ]]; then
@@ -667,34 +697,17 @@ elif ! [[ $mem =~ $int ]] && [[ $setup == "false" ]]; then
fi

#check if chemistry matches expected input
if [[ -z "$chemistry" ]]; then
    chemistry="SC3Pv2"
elif [[ "$chemistry" != "SC3Pv2" ]] && [[ "$chemistry" != "SC5P-PE" ]] && [[ "$chemistry" != "SC5P-R2" ]]; then
if [[ "$chemistry" != "SC3Pv2" ]] && [[ "$chemistry" != "SC5P-PE" ]] && [[ "$chemistry" != "SC5P-R2" ]]; then
    echo "Error: option --chemistry must be SC3Pv2, SC5P-PE , or SC5P-R2"
    exit 1
fi

#checking if jobmode matches expected input
if [[ -z "$jobmode" ]]; then
    jobmode="local"
elif [[ "$jobmode" != "local" ]] && [[ "$jobmode" != "sge" ]] && [[ "$jobmode" != "lsf" ]] && [[ "$jobmode" != *"template" ]]; then
if [[ "$jobmode" != "local" ]] && [[ "$jobmode" != "sge" ]] && [[ "$jobmode" != "lsf" ]] && [[ "$jobmode" != *"template" ]]; then
    echo "Error: option --jobmode must be local, sge, lsf, or a .template file"
    exit 1
fi

#check if setup needs to be run before analysis (potentially overriding the user input)
if [[ -z $setup ]]; then
    setup=false
fi
if [[ $lastcall != $technology ]]; then
    setup=true
fi

#check if convertion is needs to be run before analysis (potentially overriding the user input)
if [[ ! -d $crIN ]] || [[ $lastcall != $technology ]]; then
    convert=true
fi

#check if ID is present
if [[ -z $id ]]; then
    if [[ ${#read1[@]} -ne 0 ]] || [[ ${#read2[@]} -ne 0 ]]; then
@@ -703,15 +716,17 @@ if [[ -z $id ]]; then
    fi
fi
crIN=${crIN}_${id}
if [[ ! -d ${crIN} ]]; then

#checking if crIN exists
if [[ ! -d $crIN ]]; then
    convert=true
    echo "***Warning: convertion was turned on because directory $crIN was not found***"
fi
##########



#####Selecting barcode file and setting parameter for barcode/UMI adjustments#####  
#barcode and umi lengths expected by cellranger
#####Get barcode/UMI length#####  
barcode_default=16
umi_default=10
totallength=`echo $((${barcode_default}+${umi_default}))`
@@ -736,34 +751,6 @@ fi
#adjustment lengths
barcodeadjust=`echo $(($barcodelength-$barcode_default))`
umiadjust=`echo $(($umilength-$umi_default))`

#prepare a proper barcode file
if [[ "$technology" != "10x" ]] && [[ -z $barcodefile ]]; then
    if [[ "$technology" == "nadia" ]]; then
        barcodefile=${barcodedir}/nadia_barcode.txt
        if [[ ! -f ${barcodefile} ]]; then
            #creat a nadia barcode file
            echo AAAA{A,T,C,G}{A,T,C,G}{A,T,C,G}{A,T,C,G}{A,T,C,G}{A,T,C,G}{A,T,C,G}{A,T,C,G}{A,T,C,G}{A,T,C,G}{A,T,C,G}{A,T,C,G} | sed 's/ /\n/g' | sort | uniq > ${barcodedir}/nadia_barcode.txt
        fi
    elif [[ "$technology" == "icell8" ]]; then
        barcodefile=${barcodedir}/iCell8_barcode.txt
        if [[ ! -f ${barcodefile} ]]; then
            #create an iCell8 barcode file by copying from convert repo
            cat ${SDIR}/iCell8_barcode.txt > $barcodefile
            sed -i 's/^/AAAAA/g' ${barcodefile}
	    sort -u -o ${barcodefile} ${barcodefile}
        fi
    fi
elif [[ ! -z $barcodefile ]]; then
    cat ${barcodefile} >${barcodedir}/custom_barcode.txt
    barcodefile=${barcodedir}/custom_barcode.txt
    if [[ $barcodeadjust -gt 0 ]]; then
        sed -i "s/^.{${barcodeadjust}}//" ${barcodefile} #Trim the first n characters from the beginning of the sequence and quality
    elif [[ 0 -gt $barcodeadjust ]]; then
        As=`printf '%0.sA' $(seq 1 $(($barcodeadjust * -1)))`
        sed -i "s/^/$As/" ${barcodefile} #Trim the first n characters from the beginning of the quality
    fi
fi
##########


@@ -772,40 +759,32 @@ fi
#set up .lock file
if [[ ! -f $lockfile ]]; then
    echo "creating .lock file"
    echo 0 > $lockfile
    echo 1 > $lockfile
    lock=`cat $lockfile`
else
    #check if jobs running (check value in .lock file)
    #check if jobs are running (check value in .lock file)
    echo "checking .lock file"
    lock=`cat $lockfile`
    
    if [[ $lock -le 0 ]]; then
        echo " call accepted: no other cellranger jobs running"
        lock=1
        if [[ $setup == false ]]; then 
             echo $lock > $lockfile
        fi
    else
        if [[ -f $lastcallfile ]]; then
            echo " total of $lock cellranger ${cellrangerversion} jobs already running in ${cellrangerpath} with technology $lastcall"
	    echo " total of $lock cellranger ${cellrangerversion} jobs are already running in ${cellrangerpath} with barcode length (${lastcall_b}), UMI length (${lastcall_u}), and whitelist barcodes (${lastcall_p})"
            
	    #check if a custom barcode is used for a run (which cannot be run in parallel)
            if [[ $lastcall == "custom" ]]; then
                echo "Error: cellranger is currently running with a custom barcode list"
                echo "other jobs cannot be run until the current job is complete"
                echo "remove $lockfile if $lastcall jobs have completed or aborted"
                exit 1
            elif [[ $lastcall == $technology ]]; then
            if [[ ${barcode_length} == ${lastcall_b} ]] && [[ ${umilength} == ${lastcall_u} ]] && [[ ${barcodefile} == ${lastcall_p} ]]; then
                echo " call accepted: no conflict detected with other jobs currently running"
                #add current job to lock
                lock=$(($lock+1))
                if [[ $setup == false ]]; then 
                    echo $lock > $lockfile
                fi
                setup=false
            else
                echo "Error: conflict between technology selected for the new job ($technology) and other jobs currently running ($lastcall)"
                echo "barcode whitelist configured and locked for currently running technology: $lastcall"
                echo "remove $lockfile if $lastcall jobs have completed or aborted"
                echo "Error: conflict between technology selected for the new job and other jobs currently running"
                echo "make sure that the barcode length, UMI length, and the whitelist barcodes are the same as the other jobs currently running"
                echo "if confident that no other jobs are running and still get this error, remove $lockfile and try again"
                exit 1
            fi
        else
@@ -820,16 +799,15 @@ fi
####report inputs#####
echo ""
echo "#####Input information#####"
echo "SETUP: $setup"
echo "SETUP and exit: $setup"
if [[ $setup == "true" ]]; then
    echo "***Warning: launch_universc.sh will exit once whitelist is converted***"
fi
echo "FORMAT: $technology"
if [[ $technology == "nadia" ]]; then
    echo "***Warning: whitelist is converted for compatibility with $technology, valid barcodes cannot be detected accurately with this technology***"
fi
if [[ -z $barcodefile ]]; then
    echo "BARCODES: default"
else
    echo "BARCODES: (custom barcode file) $barcodefile"
fi
echo "BARCODES: ${barcodefile}"
if [[ ${#read1[@]} -eq 0 ]] && [[ ${#read1[@]} -eq 0 ]]; then
    echo "***Warning: no FASTQ files were selected, launch_universc.sh will exit after setting up the whitelist***"
fi
@@ -867,6 +845,9 @@ if [[ "$jobmode" == "local" ]]; then
    echo "***Warning: --jobmode \"sge\" is recommended if running script with qsub***"
fi
echo "CONVERSTION: $convert"
if [[ $convert == "false" ]]; then
    echo "***Warning: adjustment for barcode and UMI length was skipped***"
fi
echo "##########"
echo ""
##########
@@ -874,8 +855,7 @@ echo ""


####setup whitelist#####
#run setup if called
if [[ $setup == "true" ]]; then
if [[ $lock -eq 1 ]]; then
    echo "setup begin"
    echo "updating barcodes in $barcodedir for cellranger version ${cellrangerversion} installed in ${cellrangerpath} ..."
    
@@ -896,38 +876,44 @@ if [[ $setup == "true" ]]; then
        echo " ${cellrangerpath} set for $technology"
    fi
    
    #whitelist file name
    v2=737K-august-2016.txt
    v3=3M-february-2018.txt

    #generate backup for the default 10x whitelist
    if [[ ! -f 737K-august-2016.txt.backup ]] || [[ ! -f 3M-february-2018.txt.backup.gz ]]; then
        echo " generating backups for default 10x whitelist"
        cp -f 737K-august-2016.txt 737K-august-2016.txt.backup
       	cp -f 3M-february-2018.txt.gz 3M-february-2018.txt.backup.gz
        cp -f ${v2} ${v2}.backup
       	cp -f ${v3}.gz ${v3}.backup.gz
        echo " backup generated"
    fi
    
    #convert whitelist to the apropriate barcode
    echo " converting whitelist"
    if [[ -z ${barcodefile} ]]; then
    if [[ ${barcodefile} == "default" ]]; then
        #for version 2
        cp 737K-august-2016.txt.backup 737K-august-2016.txt
        cp ${v2}.backup ${v2}
        #for version 3
        cp 3M-february-2018.txt.backup.gz 3M-february-2018.txt.gz
        cp ${v3}.backup.gz ${v3}.gz
    else
        #for version 2
        cat $barcodefile > 737K-august-2016.txt
	cat ${barcodefile} > ${v2}
        if [[ $barcodeadjust -gt 0 ]]; then
            sed -i "s/^.{${barcodeadjust}}//" ${v2} #Trim the first n characters from the beginning of the sequence and quality
        elif [[ 0 -gt $barcodeadjust ]]; then
            As=`printf '%0.sA' $(seq 1 $(($barcodeadjust * -1)))`
            sed -i "s/^/$As/" ${v2} #Trim the first n characters from the beginning of the quality
        fi
        #for version 3
        cat 737K-august-2016.txt > 3M-february-2018.txt
        gzip -f 3M-february-2018.txt
        rm translation/3M-february-2018.txt.gz
        ln -s 3M-february-2018.txt.gz translation/3M-february-2018.txt.gz
        cat ${v2} > ${v3}
        gzip -f ${v3}
        rm translation/${v3}.gz
        ln -s ${v3}.gz translation/${v3}.gz
    fi
    echo " whitelist converted"
    
    #change last call file
    if [[ ! -z $barcodefile ]]; then
        echo "custom" > $lastcallfile
    else
        echo $technology > $lastcallfile
    fi
    echo "${barcodelength} ${umilength} ${barcodefile}" > $lastcallfile
    
    cd - > /dev/null
    
@@ -938,11 +924,11 @@ fi


#####exiting when setup is all that is requested#####
if [[ ${#read1[@]} -eq 0 ]] && [[ ${#read2[@]} -eq 0 ]]; then
if [[ $setup == "ture" ]]; then
    lock=`cat $lockfile`
    lock=$(($lock-1))
    echo $lock > $lockfile
    echo " whitelist converted and no FASTQ files are selected. exiting launch_universc.sh"
    echo " setup complete. exiting launch_universc.sh"
    exit 0
fi
##########
@@ -999,13 +985,14 @@ done
echo "converting input files to confer cellranger format ..."
if [[ $convert == "false" ]]; then
    echo " input file format conversion skipped"
fi
else
    echo " adjustment parameters:"
    echo "  barcodes: ${barcodeadjust}bps at its head"
    echo "  UMIs: ${umiadjust}bps at its tail" 
    
    #converting barcodes
    echo " adjusting barcodes of R1 files"
if [[ $barcodeadjust != 0 ]] && [[ $convert == "true" ]]; then
    if [[ $barcodeadjust != 0 ]]; then
        if [[ $barcodeadjust -gt 0 ]]; then
            for convFile in "${convFiles[@]}"; do
                echo " handling $convFile ..."
@@ -1023,6 +1010,7 @@ if [[ $barcodeadjust != 0 ]] && [[ $convert == "true" ]]; then
            done
        fi
    fi
    
    #UMI
    echo " adjusting UMIs of R1 files"
    if [[ 0 -gt $umiadjust ]]; then 
@@ -1037,6 +1025,7 @@ if [[ 0 -gt $umiadjust ]]; then
            echo "  ${convFile} adjusted"
        done
    fi
fi
##########