Commit 8fa4c886 authored by TomKellyGenetics's avatar TomKellyGenetics
Browse files

Merge branch 'master' of github.com:TomKellyGenetics/cellranger_convert

parents f6c44589 af4d68e3
Loading
Loading
Loading
Loading
+25 −0
Original line number Diff line number Diff line
@@ -4,3 +4,28 @@ test/cellranger_reference/cellranger-tiny-ref/1.2.0/genes filter=lfs diff=lfs me
test/cellranger_reference/cellranger-tiny-ref/1.2.0/pickle filter=lfs diff=lfs merge=lfs -text
test/cellranger_reference/cellranger-tiny-ref/1.2.0/reference.json filter=lfs diff=lfs merge=lfs -text
test/cellranger_reference/cellranger-tiny-ref/1.2.0/star filter=lfs diff=lfs merge=lfs -text
test/cellranger_reference/cellranger-tiny-ref/3.0.0 filter=lfs diff=lfs merge=lfs -text
test/cellranger_reference/cellranger-tiny-ref/3.0.0/fasta filter=lfs diff=lfs merge=lfs -text
test/cellranger_reference/cellranger-tiny-ref/3.0.0/genes filter=lfs diff=lfs merge=lfs -text
test/cellranger_reference/cellranger-tiny-ref/3.0.0/pickle filter=lfs diff=lfs merge=lfs -text
test/cellranger_reference/cellranger-tiny-ref/3.0.0/reference.json filter=lfs diff=lfs merge=lfs -text
test/cellranger_reference/cellranger-tiny-ref/3.0.0/star filter=lfs diff=lfs merge=lfs -text
test/cellranger_reference/cellranger-tiny-ref/3.0.0/star/exonGeTrInfo.tab filter=lfs diff=lfs merge=lfs -text
test/cellranger_reference/cellranger-tiny-ref/3.0.0/star/genomeParameters.txt filter=lfs diff=lfs merge=lfs -text
test/cellranger_reference/cellranger-tiny-ref/3.0.0/star/sjdbList.fromGTF.out.tab filter=lfs diff=lfs merge=lfs -text
test/cellranger_reference/cellranger-tiny-ref/3.0.0/fasta/genome.fa filter=lfs diff=lfs merge=lfs -text
test/cellranger_reference/cellranger-tiny-ref/3.0.0/fasta/genome.fa.fai filter=lfs diff=lfs merge=lfs -text
test/cellranger_reference/cellranger-tiny-ref/3.0.0/pickle/genes.pickle filter=lfs diff=lfs merge=lfs -text
test/cellranger_reference/cellranger-tiny-ref/3.0.0/star/chrNameLength.txt filter=lfs diff=lfs merge=lfs -text
test/cellranger_reference/cellranger-tiny-ref/3.0.0/star/Genome filter=lfs diff=lfs merge=lfs -text
test/cellranger_reference/cellranger-tiny-ref/3.0.0/star/SA filter=lfs diff=lfs merge=lfs -text
test/cellranger_reference/cellranger-tiny-ref/3.0.0/star/chrLength.txt filter=lfs diff=lfs merge=lfs -text
test/cellranger_reference/cellranger-tiny-ref/3.0.0/star/exonInfo.tab filter=lfs diff=lfs merge=lfs -text
test/cellranger_reference/cellranger-tiny-ref/3.0.0/star/geneInfo.tab filter=lfs diff=lfs merge=lfs -text
test/cellranger_reference/cellranger-tiny-ref/3.0.0/star/sjdbInfo.txt filter=lfs diff=lfs merge=lfs -text
test/cellranger_reference/cellranger-tiny-ref/3.0.0/star/sjdbList.out.tab filter=lfs diff=lfs merge=lfs -text
test/cellranger_reference/cellranger-tiny-ref/3.0.0/star/transcriptInfo.tab filter=lfs diff=lfs merge=lfs -text
test/cellranger_reference/cellranger-tiny-ref/3.0.0/genes/genes.gtf filter=lfs diff=lfs merge=lfs -text
test/cellranger_reference/cellranger-tiny-ref/3.0.0/star/SAindex filter=lfs diff=lfs merge=lfs -text
test/cellranger_reference/cellranger-tiny-ref/3.0.0/star/chrName.txt filter=lfs diff=lfs merge=lfs -text
test/cellranger_reference/cellranger-tiny-ref/3.0.0/star/chrStart.txt filter=lfs diff=lfs merge=lfs -text
+40 −12
Original line number Diff line number Diff line
@@ -69,9 +69,9 @@ Mandatory arguments to long options are mandatory for short options too.
  -i,  --id ID                  A unique run id, used to name output folder
  -d,  --description TEXT       Sample description to embed in output files.
  -r,  --reference DIR          Path of directory containing 10x-compatible reference.
  -t,  --technology PLATFORM    Name of technology used to generate data (10x, nadia, icell8, or custom)
  -t,  --technology PLATFORM    Name of technology used to generate data (10x, chromium, nadia, dropseq, icell8, or custom)
                                e.g. custom_16_10
  -b,  --barcodefile FILE       Custom barcode list in plain text  
  -b,  --barcodefile FILE       Custom barcode list in plain text (with each line containing a barcode)
  
  -c,  --chemistry CHEM         Assay configuration, autodetection is not possible for converted files: 'SC3Pv2' (default), 'SC5P-PE', or 'SC5P-R2'
  -n,  --force-cells NUM        Force pipeline to use this number of cells, bypassing the cell detection algorithm.
@@ -138,7 +138,13 @@ id=""
description=""
reference=""
ncells=""
if [[ $technology == "10x" ]] || [[ $technology == "chromium" ]]; then
    #set default chemistry to auto detect 10x version 2 or 3
    chemistry="auto"
else
    #otherwise use version 2 configurations for other platforms
    chemistry="SC3Pv2"
fi
jobmode="local"
ncores=""
mem=""
@@ -418,6 +424,17 @@ if ! [[ -w "$SDIR" ]]; then
    exit 1
fi


#aliases for technology with the same settings
if [[ "$technology" == "chromium" ]]; then
    echo "Running with technology 10x (chromium)" 
    technology="10x"
fi
if [[ "$technology" == "dropseq" ]] || [[ "$technology" == "drop-seq" ]]; then
    echo "Running with Nadia parameters (Drop-Seq)"
    technology="nadia"
fi

#check if technology matches expected inputs
if [[ "$technology" != "10x" ]] && [[ "$technology" != "nadia" ]] && [[ "$technology" != "icell8" ]]; then
    if [[ "$technology" != "custom"* ]]; then
@@ -645,10 +662,12 @@ if [[ ! -z "$barcodefile" ]]; then
	exit 1
    else
        barcodefile=`readlink -f $barcodefile`
        #all barcodes upper case
        sed -i 's/.*/\U&/g' $barcodefile
    fi
else
    if [[ "$technology" == "10x" ]]; then
        barcodefile=default
        barcodefile="default"
    elif [[ "$technology" == "nadia" ]]; then
        barcodefile=${SDIR}/nadia_barcode.txt
        if [[ ! -f ${barcodefile} ]]; then
@@ -696,8 +715,8 @@ elif ! [[ $mem =~ $int ]] && [[ $setup == "false" ]]; then
fi

#check if chemistry matches expected input
if [[ "$chemistry" != "SC3Pv2" ]] && [[ "$chemistry" != "SC5P-PE" ]] && [[ "$chemistry" != "SC5P-R2" ]]; then
    echo "Error: option --chemistry must be SC3Pv2, SC5P-PE , or SC5P-R2"
if [[ "$chemistry" != "SC3Pv3" ]] && [[ "$chemistry" != "SC3Pv2" ]] && [[ "$chemistry" != "SC5P-PE" ]] && [[ "$chemistry" != "SC5P-R2" ]]; then
    echo "Error: option --chemistry must be SC3Pv3, SC3Pv2, SC5P-PE , or SC5P-R2"
    exit 1
fi

@@ -758,7 +777,7 @@ umiadjust=`echo $(($umilength-$umi_default))`
#set up .lock file
if [[ ! -f $lockfile ]]; then
    echo "creating .lock file"
    echo 1 > $lockfile
    echo 0 > $lockfile
    lock=`cat $lockfile`
else
    #check if jobs are running (check value in .lock file)
@@ -768,22 +787,28 @@ else
    if [[ $lock -le 0 ]]; then
        echo " call accepted: no other cellranger jobs running"
        lock=1
        if [[ $setup == "false" ]]; then 
                    echo $lock > $lockfile
        fi
    else
        if [[ -f $lastcallfile ]]; then
	    echo " total of $lock cellranger ${cellrangerversion} jobs are already running in ${cellrangerpath} with barcode length (${lastcall_b}), UMI length (${lastcall_u}), and whitelist barcodes (${lastcall_p})"
            
	    #check if a custom barcode is used for a run (which cannot be run in parallel)
            if [[ ${barcode_length} == ${lastcall_b} ]] && [[ ${umilength} == ${lastcall_u} ]] && [[ ${barcodefile} == ${lastcall_p} ]]; then
            if [[ ${barcodelength} == ${lastcall_b} ]] && [[ ${umilength} == ${lastcall_u} ]] && [[ ${barcodefile} == ${lastcall_p} ]]; then
                echo " call accepted: no conflict detected with other jobs currently running"
                #add current job to lock
                lock=$(($lock+1))
                if [[ $setup == false ]]; then 
                if [[ $setup == "false" ]]; then 
                    echo $lock > $lockfile
                fi
            else
                echo "Error: conflict between technology selected for the new job and other jobs currently running"
                echo "make sure that the barcode length, UMI length, and the whitelist barcodes are the same as the other jobs currently running"
                echo "if confident that no other jobs are running and still get this error, remove $lockfile and try again"
                if [[ $verbose ]]; then
                    echo "Submitted configuration with barcode length (${barcodelength}), UMI length (${umilength}), and whitelist barcodes (${barcodefile})"
                fi
                exit 1
            fi
        else
@@ -854,7 +879,7 @@ echo ""


####setup whitelist#####
if [[ $lock -eq 1 ]]; then
if [[ $lock -eq 0 ]]; then
    echo "setup begin"
    echo "updating barcodes in $barcodedir for cellranger version ${cellrangerversion} installed in ${cellrangerpath} ..."
    
@@ -917,8 +942,10 @@ if [[ $lock -eq 1 ]]; then
    cd - > /dev/null
    
    echo "setup complete"
    if [[ $setup == "true" ]]; then
        exit 0
    fi
fi
#########


@@ -1114,9 +1141,10 @@ echo "cellranger run complete"
#####remove files if convert is not running elsewhere#####
echo "updating .lock file"

#remove currewnt job from counter (successfully completed)
#remove current job from counter (successfully completed)
lock=`cat ${cellrangerpath}-cs/${cellrangerversion}/lib/python/cellranger/barcodes/.lock`
lock=$(($lock-1))
echo $lock > $lockfile

#check if jobs running
if [[ $lock -ge 1 ]]; then
+15 −8
Original line number Diff line number Diff line
@@ -12,21 +12,27 @@ bash /universc/launch_universc.sh -t "10x" --setup

## test 10x data
# unzip input data
if [[ ! -f /cellranger-3.0.2.9001/cellranger-cs/3.0.2.9001/lib/python/cellranger/barcodes/3M-february-2018.txt.gz ]]; then
    gzip /cellranger-3.0.2.9001/cellranger-cs/3.0.2.9001/lib/python/cellranger/barcodes/3M-february-2018.txt
fi
# test cellranger call
cellranger testrun --id="tiny-test"
# unzip input data
gunzip /universc/test/shared/cellranger-tiny-fastq/3.0.0/*fastq
gunzip /cellranger-3.0.2.9001/cellranger-cs/3.0.2.9001/lib/python/cellranger/barcodes/3M-february-2018.txt.gz 
gunzip -fk /universc/test/shared/cellranger-tiny-fastq/3.0.0/*fastq.gz
gunzip -fk /cellranger-3.0.2.9001/cellranger-cs/3.0.2.9001/lib/python/cellranger/barcodes/3M-february-2018.txt.gz 
# test cellranger call
cellranger count --id="tiny-count" \
 --fastqs="/cellranger-3.0.2.9001/cellranger-tiny-fastq/3.0.0/" --sample="tinygex_S1" \
 --transcriptome="/cellranger-3.0.2.9001/cellranger-tiny-ref/3.0.0/" \
 --chemistry="SC3Pv2"
cellranger count --id="tiny-count-v3" \
 --fastqs="/cellranger-3.0.2.9001/cellranger-tiny-fastq/3.0.0/" --sample="tinygex" \
 --transcriptome="/cellranger-3.0.2.9001/cellranger-tiny-ref/3.0.0"

cellranger count --id="tiny-count-v2" \
 --fastqs="/cellranger-3.0.2.9001/cellranger-tiny-fastq/1.2.0/" --sample="tinygex" \
 --transcriptome="/cellranger-3.0.2.9001/cellranger-tiny-ref/1.2.0"

# call convert on 10x with multiple lanes
bash /universc/launch_universc.sh --id "test-10x" --technology "10x" \
 --reference "/universc/test/cellranger_reference/cellranger-tiny-ref/3.0.0" \
 --chemistry "SC3Pv3" \
 --file "/universc/test/shared/cellranger-tiny-fastq/3.0.0/tinygex_S1_L001" \
 "/universc/test/shared/cellranger-tiny-fastq/3.0.0/tinygex_S1_L002"

@@ -39,7 +45,8 @@ bash /universc/launch_universc.sh -t "nadia" --setup
bash /universc/launch_universc.sh --id "test-dropseq" --technology "nadia" \
 --reference "/universc/test/cellranger_reference/cellranger-tiny-ref/3.0.0" \
 --read1 "/universc/test/shared/dropseq-test/SRR1873277_S1_L001_R1_001" \
 --read2 "/universc/test/shared/dropseq-test/SRR1873277_S1_L001_R2_001" 
 --read2 "/universc/test/shared/dropseq-test/SRR1873277_S1_L001_R2_001" \
 --chemistry "SCP-V3"

## test icell8 data
# unzip input data
+0 −9
Original line number Diff line number Diff line
Row	Col	Candidate	For dispense	Sample	Barcode	State	Cells1	Cells2	Signal1	Signal2	Size1	Size2	Integ Signal1	Integ Signal2	Circularity1	Circularity2	Confidence	Confidence1	Confidence2	Comment	Dispense tip	Drop index	Global drop index
6	37	yes	yes	K562	CGTCGAGG+TCATGCTG	Good	1	0	390		59		23010		0.943399		1	1	1		4	159	430
44	45	yes	yes	K562	TCATACCA+GTTCGGTT	Good	1	0	489		63		30807		0.9645678		1	1	1		6	25	639
38	4	yes	yes	K562	TGGATCAA+GTTCAACT	Good	1	0	509		63		32067		0.9645678		1	1	1		1	5	176
38	53	yes	yes	K562	TGGATCAA+GTAGAATG	Good	1	0	799		27		21573		1		1	1	1		2	124	161
0	19	yes	yes	K562	AACCGGTT+TTAGGCGG	Good	1	0	504		58		29232		1		1	1	1		1	114	430
4	37	yes	yes	Pos_Ctrl	CATTCGGT+TCATGCTG	Good	1	0	411		46		18906		1		1	1	1		7	87	169
42	52	yes	yes	Pos_Ctrl	ATTCTACC+CTTCTTAC	Good	1	0	554		58		32132		1		1	1	1		1	98	150
8	4	yes	yes	K562	TATACGGA+GTTCAACT	Good	1	0	493		63		31059		0.9645678		1	1	1		8	5	101
 No newline at end of file
+8 −0
Original line number Diff line number Diff line
CGTCGAGGTCATGCTG
TCATACCAGTTCGGTT
TGGATCAAGTTCAACT
TGGATCAAGTAGAATG
AACCGGTTTTAGGCGG
CATTCGGTTCATGCTG
ATTCTACCCTTCTTAC
TATACGGAGTTCAACT