Commit 575462f6 authored by TomKellyGenetics's avatar TomKellyGenetics
Browse files

add index I2 input and document index inputs

parent 8c0a1d6e
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -724,6 +724,8 @@ Mandatory arguments to long options are mandatory for short options too.
       --testrun                Initiates a test trun with the test dataset
  -R1, --read1 FILE             Read 1 FASTQ file to pass to cellranger (cell barcodes and umi)
  -R2, --read2 FILE             Read 2 FASTQ file to pass to cellranger
  -I1, --index1 FILE            Index (I1) FASTQ file to pass to cellranger (OPTIONAL)
  -I2, --index2 FILE            Index (I2) FASTQ file to pass to cellranger (OPTIONAL and EXPERIMENTAL)
  -f,  --file NAME              Path and the name of FASTQ files to pass to cellranger (prefix before R1 or R2)
                                  e.g. /path/to/files/Example_S1_L001

+91 −45
Original line number Diff line number Diff line
@@ -136,7 +136,8 @@ Mandatory arguments to long options are mandatory for short options too.
       --testrun                Initiates a test trun with the test dataset
  -R1, --read1 FILE             Read 1 FASTQ file to pass to cellranger (cell barcodes and umi)
  -R2, --read2 FILE             Read 2 FASTQ file to pass to cellranger
  -I1, --index FILE             Index FASTQ file to pass to cellranger (OPTIONAL)
  -I1, --index1 FILE            Index (I1) FASTQ file to pass to cellranger (OPTIONAL)
  -I2, --index2 FILE            Index (I2) FASTQ file to pass to cellranger (OPTIONAL and EXPERIMENTAL)
  -f,  --file NAME              Path and the name of FASTQ files to pass to cellranger (prefix before R1 or R2)
                                  e.g. /path/to/files/Example_S1_L001

@@ -213,7 +214,8 @@ convert=true
testrun=false
read1=()
read2=()
index=()
index1=()
index2=()
SAMPLE=""
LANE=()
id=""
@@ -268,22 +270,36 @@ for op in "$@"; do
                exit 1
            fi
            ;;
        -I1|--index)
        -I1|--index1|--index)
            shift
            if [[ "$1" != "" ]]; then
                arg=$1
                while [[ ! "$arg" == "-"* ]] && [[ "$arg" != "" ]]; do
                    index+=("${1/%\//}")
                    index1+=("${1/%\//}")
                    shift
                    arg=$1
                done
                next=true
            elif [[ -z $index ]]; then
                echo "Error: file input missing for --index"
            elif [[ -z $index1 ]]; then
                echo "Error: file input missing for --index1"
                exit 1
            fi
            ;;
        -f|--file)
        -I2|--index2)
            shift
            if [[ "$1" != "" ]]; then
                arg=$1
                while [[ ! "$arg" == "-"* ]] && [[ "$arg" != "" ]]; do
                    index2+=("${1/%\//}")
                    shift
                    arg=$1
                done
                next=true
            elif [[ -z $index1 ]]; then
                echo "Error: file input missing for --index1"
                exit 1
            fi
            ;;        -f|--file)
            shift
            if [[ "$1" != "" ]]; then
                arg=$1
@@ -573,7 +589,7 @@ if [[ "$technology" == "smartseq" ]]; then
    echo "***WARNING: ${technology} should only be used for kits that have UMIs***"
fi
if [[ "$technology" == "smartseq" ]] || [[ "$technology" == "indrop-v1" ]] || [[ "$technology" == "indrop-v2" ]] || [[ "$technology" == "indrop-v3" ]]; then
    echo "***WARNING: launch_universc.sh does not support dual index. Make sure that the R1 file is adjusted accordingly prior to running launch_universc.sh***"
    echo "***WARNING: launch_universc.sh does not support barcodes in dual indexes. Make sure that the R1 file is adjusted accordingly prior to running launch_universc.sh***"
fi
##########

@@ -741,30 +757,52 @@ if [[ $setup == "false" ]]; then
    fi
fi
keys=("R1" "R2")
index2=()

# check if indexes given
if [[ ${#index[@]} -eq ${#read1[@]} ]] && [[ ${#index[@]} -ge 1 ]]; then
    if [[ ${#index[@]} -eq 1 ]]; then
        echo " index $index passes"
    elif [[ ${#index[@]} -ge 2 ]]; then
        echo " indices $index passes"
#####check if indexes are given #####
##   Note that indexes are not     ##
##    supported by conversion      ##
##  You must demultiplex before    ##
##         calling cellranger      ##
##   This is a work-in-progress    ##
#####################################
if [[ ${#index1[@]} -eq ${#read1[@]} ]] && [[ ${#index1[@]} -ge 1 ]]; then
    if [[ ${#index1[@]} -eq 1 ]]; then
        echo " index1 $index1 passes"
    elif [[ ${#index1[@]} -ge 2 ]]; then
        echo " indices $index1 passes"
    else
        echo "WARNING: mismatch in number of files (check index I1 files)"
        echo "NOTE: if no index files are specified, these can be detected from R1 file names"
    fi
elif [[ ${#index2[@]} -eq ${#read1[@]} ]] && [[ ${#index2[@]} -ge 1 ]]; then
     if [[ ${#index2[@]} -eq 1 ]]; then
         echo " index2 $index2 passes"
     elif [[ ${#index2[@]} -ge 2 ]]; then
         echo " indices $index2 passes"
     else
        echo "WARNING: mismatch in number of files (check index files)"
         echo "WARNING: mismatch in number of files (check index I2 files)"
         echo "NOTE: if no index files are specified, these can be detected from R1 file names"
     fi
else
    #if number of files mismatch or no index given
    echo " checking for index files..."
    #if number of files mismatch or no index1 given
    echo " checking for index1 files..."
    for ii in $(seq 1 1 ${#read1[@]}); do
        #iterate over read1 inputs
        indexfile=${read1[$(( $ii -1 ))]}
        #derive I1 filename for R1 filename
        indexfile=$(echo $indexfile | perl -pne 's/(.*)_R1/$1_I1/' )
        #only add index files to list variable if file exists
        #only add index1 files to list variable if file exists
        if [[ -f $indexfile ]] || [[ -f ${indexfile}.gz ]] || [[ -f $indexfile.fastq ]] || [[ -f ${indexfile}.fastq.gz ]] || [[ -f $indexfile.fq ]] || [[ -f ${indexfile}.fq.gz ]]; then
            index+=("$indexfile")
            index1+=("$indexfile")
        fi
        #check for dual indexing (I2 files)
        #####check dual index(I1 and I2)#####
        ##  Note that indexes are copied   ##
        ##     but are not converted       ##
        ##   Demultiplex with bcl2fastq    ##
        ##   This is a work-in-progress    ##
        #####################################
        if [[ "$technology" == "indrop-v3" ]] || [[ "$technology" == "sci-seq" ]] || [[ "$technology" == "smartseq" ]]; then
             #iterate over read1 inputs
             indexfile=${read1[$(( $ii -1 ))]}
@@ -780,27 +818,35 @@ fi
if [[ $verbose = "true" ]]; then
    echo "${#read1[@]} read1s: ${read1[@]}"
    echo "${#read2[@]} read2s: ${read2[@]}"
    echo "${#index[@]} I1s: ${index[@]}"
    echo "${#index1[@]} I1s: ${index1[@]}"
    echo "${#index2[@]} I2s: ${index2[@]}"
fi

#check number of index files is 0 or number of read1 files
if [[ ${#index[@]} -eq ${#read1[@]} ]] || [[ ${#index[@]} -eq 0 ]]; then
    if [[ ${#index[@]} -eq ${#read1[@]} ]]; then
        echo "... accepted index file: ${index[@]}"
#check number of index1 files is 0 or number of read1 files
if [[ ${#index1[@]} -eq ${#read1[@]} ]] || [[ ${#index1[@]} -eq 0 ]]; then
    if [[ ${#index1[@]} -eq ${#read1[@]} ]]; then
        echo "... accepted index1 file: ${index1[@]}"
        keys=("R1" "R2" "I1")
    elif [[ ${#index[@]} -eq 0 ]]; then
        echo "... index files not found (optional)"
    fi
    if [[ ${#index1[@]} -eq 0 ]]; then
        echo "... index1 files not found (optional)"
    fi
else
        echo "... index files missing for some samples or lanes (will be skipped)"
        index=()
    echo "... index1 files missing for some samples or lanes (will be skipped)"
    index1=()
    fi
fi
elif [[ ${#index[@]} -eq $(( ${#read1[@]} * 2 )) ]]; then 
     echo "... accepted index file: ${index[@]}"
if [[ ${#index1[@]} -eq ${#read1[@]} ]] && [[ ${#index2[@]} -eq ${#read1[@]} ]] || [[ ${#index2[@]} -eq 0 ]]; then
    if [[ ${#index1[@]} -eq ${#read1[@]} ]] && [[ ${#index2[@]} -eq ${#read1[@]} ]]; then
        echo "... accepted index1 file: ${index1[@]}"
        keys=("R1" "R2" "I1" "I2")
    fi
    if [[ ${#index2[@]} -eq 0 ]]; then 
        echo "... index2 files not found (optional)"
    fi
else
    echo "... index files missing for some samples or lanes (will be skipped)"
    index=()
    echo "... index2 files missing for some samples or lanes (will be skipped)"
    index2=()
fi


@@ -814,7 +860,7 @@ for key in ${keys[@]}; do
    elif [[ $readkey == "R2" ]]; then
        list=("${read2[@]}")
    elif [[ $readkey == "I1" ]]; then
        list=("${index[@]}")
        list=("${index1[@]}")
     elif [[ $readkey == "I2" ]]; then
         list=("${index2[@]}")
    fi
@@ -872,7 +918,7 @@ for key in ${keys[@]}; do
    elif [[ $readkey == "R2" ]]; then
        read2=("${list[@]}")
    elif [[ $readkey == "I1" ]]; then
        index=("${list[@]}")
        index1=("${list[@]}")
     elif [[ $readkey == "I2" ]]; then
        index2=("${list[@]}")
    fi
@@ -887,7 +933,7 @@ for i in ${keys[@]}; do
    elif [[ $readkey == "R2" ]]; then
        list=("${read2[@]}")
    elif [[ $readkey == "I1" ]]; then
         list=("${index[@]}")
         list=("${index1[@]}")
    elif [[ $readkey == "I2" ]]; then
         list=("${index2[@]}")
    fi
@@ -972,7 +1018,7 @@ for i in ${keys[@]}; do
    elif [[ $readkey == "R2" ]]; then
        read2=("${list[@]}")
     elif [[ $readkey == "I1" ]]; then
        index=("${list[@]}")
        index1=("${list[@]}")
      elif [[ $readkey == "I2" ]]; then
        index2=("${list[@]}")
    fi
@@ -990,10 +1036,10 @@ fi

#checking the quality of fastq file names
read12=("${read1[@]}" "${read2[@]}")
if [[ ${#index[@]} -ge 1 ]]; then
    read12=("${read1[@]}" "${read2[@]}" "${index[@]}")
if [[ ${#index1[@]} -ge 1 ]]; then
    read12=("${read1[@]}" "${read2[@]}" "${index1[@]}")
    if [[ ${#index2[@]} -ge 1 ]]; then
        read12=("${read1[@]}" "${read2[@]}" "${index[@]}" "${index2[@]}")
        read12=("${read1[@]}" "${read2[@]}" "${index1[@]}" "${index2[@]}")
    fi
fi
for fq in "${read12[@]}"; do
@@ -1550,17 +1596,17 @@ for fq in "${read2[@]}"; do
    fi
done

if [[ ${#index[@]} -ge 1 ]]; then
if [[ ${#index1[@]} -ge 1 ]]; then
    crI1s=()
    
    if [[ $verbose == true ]]; then
         echo "Processing Index"
         echo "Fastqs: ${index[@]}"
         echo "Fastqs: ${index1[@]}"
    fi
    if [[ $verbose == true ]]; then
        echo "${index[@]}"
        echo "${index1[@]}"
    fi
    for fq in "${index[@]}"; do
    for fq in "${index1[@]}"; do
        if [[ $verbose  == true ]]; then echo "$fq" ; fi
        to=`basename $fq`
        to="${crIN}/${to}"
+91 −0
Original line number Diff line number Diff line
@@ -59,6 +59,97 @@ Provides a conversion script to run multiple technologies and custom libraries w

                 --read2 Sample_S1_L001_R2_001.fastq Sample_S1_L002_R2_001.fastq

  -I1, --index1 FILE
            Index (I1) FASTQ file to pass to cellranger (OPTIONAL). Contains the indexes 
            for each sample. (In the case of Illumina paired-ends these are the i7 indexes).
            Please provide the name of FASTQ file in the working directory or the path to it.
            String must match the name of an exiting file. Files can have any of the
            following extensions:

                  .fastq .fq .fastq.gz .fq.gz

             Compressed files will be opened automatically. Files will be renamed for
             compatibility with cellranger:

                  e.g.,  SRR1873277_I1.fastq will be renamed to SRR1873277_S1_L001_I1_001.fastq

             Names for multiple files can be given, for example multiple lanes:

                 --index1 Sample_S1_L001_I1_001.fastq Sample_S1_L002_I1_001.fastq

             If index files are not given but are contained in the same directory
             as the files for the reads, they will be inferred from them.

            For example is a file Sample_S1_L001_I1_001.fastq is in the same directory,
            it will be passed to cellranger when launch_universc.sh is called:

                bash launch_universc.sh -t "dropseq" -R1 Sample_S1_L001_R1_001.fastq -R2 Sample_S1_L001_R2_001.fastq

            It is still advisable to demultiplex samples with Illumina bcl2fastq or cellranger mkfastq
            before passing them to convert. Index files are passed to cellranger for QC. For example:

                cellranger mkfastq --run=/path/to/illumina/bcls --id=sample-name  --sample-sheet=/path/to/SampleSheet.csv --lanes=1,2

            Or

                /usr/local/bin/bcl2fastq -v --runfolder-dir "/path/to/illumina/bcls"  --output-dir "./Data/Intensities/BaseCalls"\
                                            --sample-sheet "/path/to/SampleSheet.csv" --create-fastq-for-index-reads

  -I2, --index2 FILE
            Index (I2) FASTQ file to pass to cellranger (OPTIONAL). Contains the indexes 
            for each sample. (In the case of Illumina paired-ends these are the i5 indexes).
            Please provide the name of FASTQ file in the working directory or the path to it.
            String must match the name of an exiting file. Files can have any of the
            following extensions:

                  .fastq .fq .fastq.gz .fq.gz

             Compressed files will be opened automatically. Files will be renamed for
             compatibility with cellranger:

                  e.g.,  SRR1873277_I2.fastq will be renamed to SRR1873277_S1_L001_I2_001.fastq

             Names for multiple files can be given, for example multiple lanes:

                 --index1 Sample_S1_L001_I2_001.fastq Sample_S1_L002_I2_001.fastq

             If index files are not given but are contained in the same directory
             as the files for the reads, they will be inferred from them.

            For example is a file Sample_S1_L001_I2_001.fastq is in the same directory,
            it will be passed to cellranger when launch_universc.sh is called:

                bash launch_universc.sh -t "dropseq" -R1 Sample_S1_L001_R1_001.fastq -R2 Sample_S1_L001_R2_001.fastq

            This is sufficent to pass files Sample_S1_L001_I1_001.fastq and Sample_S1_L001_I2_001.fastq
            to cellranger if they are in the same directory.

            It is still advisable to demultiplex samples with Illumina bcl2fastq or cellranger mkfastq
            before passing them to convert. Index files are passed to cellranger for QC. For example:

                cellranger mkfastq --run=/path/to/illumina/bcls --id=sample-name  --sample-sheet=/path/to/SampleSheet.csv\
                                   --lanes=1,2 --use-bases-mask y26n,I8n,I8n,Y50n

            Or

                /usr/local/bin/bcl2fastq -v --runfolder-dir "/path/to/illumina/bcls"  --output-dir "./Data/Intensities/BaseCalls"\
                                            --sample-sheet "/path/to/SampleSheet.csv" --create-fastq-for-index-reads

                /usr/local/bin/bcl2fastq  -v --runfolder-dir "/path/to/illumina/bcls"  --output-dir "./Data/Intensities/BaseCalls"\
                                             --sample-sheet "/path/to/SampleSheet.csv" --create-fastq-for-index-reads\
                                             --use-bases-mask y26n,I8n,I8n,Y50n  --mask-short-adapter-reads 0\
                                             --minimum-trimmed-read-length 0

            Note that dual indexes are not supported by cellranger. Manually demultiplexing as above into separate
            FASTQ files before processing should work as multiple samples are supported. For example, files names as:

                Sample[ABCD]_S[1234]_L00[12]_R[12]_001.fastq

            These can be processed separately and aggregated together to include all cell barcodes.

            Note: processing dual-indexed files is not stable. If behaviour is not as you expect,
            we welcome you to contact us on GitHub to help you out.

  -f,  --file NAME
            Path and the name of FASTQ files to pass to cellranger (prefix before R1 or R2)