Commit c38e32ea authored by TomKellyGenetics's avatar TomKellyGenetics
Browse files

Merge branch 'dev' of github.com:kbattenb/universc into dev

parents 9dd7a4c0 ebc55103
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -59,3 +59,8 @@ test/shared/smartseq3-test/Smartseq3_diySpike_R1.fastq.gz filter=lfs diff=lfs me
test/shared/smartseq3-test/Smartseq3_diySpike_R2.fastq.gz filter=lfs diff=lfs merge=lfs -text
test/shared/sciseq-v3-test/SRR7827205_S1_R1.fastq.gz filter=lfs diff=lfs merge=lfs -text
test/shared/sciseq-v3-test/SRR7827205_S1_R2.fastq.gz filter=lfs diff=lfs merge=lfs -text
test/shared/icell8-test/72618_KU812_L002_R2_001.fastq.gz filter=lfs diff=lfs merge=lfs -text
test/shared/cellranger-tiny-fastq/1.2.0/read-RA_si-TTTCATGA_lane-008-chunk-001.fastq.gz filter=lfs diff=lfs merge=lfs -text
test/shared/icell8-test/72618_KU812_L001_R1_001.fastq.gz filter=lfs diff=lfs merge=lfs -text
test/shared/icell8-test/72618_KU812_L001_R2_001.fastq.gz filter=lfs diff=lfs merge=lfs -text
test/shared/icell8-test/72618_KU812_L002_R1_001.fastq.gz filter=lfs diff=lfs merge=lfs -text
+70 −60
Original line number Diff line number Diff line
@@ -1003,13 +1003,14 @@ if [[ $setup == "false" ]]; then
fi

if [[ $verbose ]]; then
    echo "  ${#read1[@]} read1s: ${read1[@]}"
    echo "  ${#read2[@]} read2s: ${read2[@]}"
    echo " Input files prost-curation 1"
    echo "  ${#read1[@]}files - read1s: ${read1[@]}"
    echo "  ${#read2[@]}files - read2s: ${read2[@]}"
    if [[ ${#index1[@]} -gt 0 ]]; then
        echo "  ${#index1[@]} I1s: ${index1[@]}"
        echo "  ${#index1[@]}files - I1s: ${index1[@]}"
    fi
    if [[ ${#index2[@]} -gt 0 ]]; then
        echo "  ${#index2[@]} I2s: ${index2[@]}"
        echo "  ${#index2[@]}files - I2s: ${index2[@]}"
    fi
    echo "  number of these files are as expected"
fi
@@ -1109,13 +1110,14 @@ for key in ${keys[@]}; do
done

if [[ $verbose ]]; then
    echo "  ${#read1[@]} read1s: ${read1[@]}"
    echo "  ${#read2[@]} read2s: ${read2[@]}"
    echo " Input files post-curation 2"
    echo "  ${#read1[@]}files - read1s: ${read1[@]}"
    echo "  ${#read2[@]}files - read2s: ${read2[@]}"
    if [[ ${#index1[@]} -gt 0 ]]; then
        echo "  ${#index1[@]} I1s: ${index1[@]}"
        echo "  ${#index1[@]}files - I1s: ${index1[@]}"
    fi
    if [[ ${#index2[@]} -gt 0 ]]; then
        echo "  ${#index2[@]} I2s: ${index2[@]}"
        echo "  ${#index2[@]}files -  I2s: ${index2[@]}"
    fi
    echo "  files exist, with extentions compatible with launch_universc.sh"
fi
@@ -1296,13 +1298,14 @@ for key in ${keys[@]}; do
done

if [[ $verbose ]]; then
    echo "  ${#read1[@]} read1s: ${read1[@]}"
    echo "  ${#read2[@]} read2s: ${read2[@]}"
    echo " Input files post-curation 3"
    echo "  ${#read1[@]}files - read1s: ${read1[@]}"
    echo "  ${#read2[@]}files - read2s: ${read2[@]}"
    if [[ ${#index1[@]} -gt 0 ]]; then
        echo "  ${#index1[@]} I1s: ${index1[@]}"
        echo "  ${#index1[@]}files - I1s: ${index1[@]}"
    fi
    if [[ ${#index2[@]} -gt 0 ]]; then
        echo "  ${#index2[@]} I2s: ${index2[@]}"
        echo "  ${#index2[@]}files - I2s: ${index2[@]}"
    fi
    echo "  names of these files are compatible with launch_universc.sh"
fi
@@ -1383,13 +1386,14 @@ if [[ "$technology" == "indrop-v2" ]] || [[ "$technology" == "indrop-v3" ]] || [
fi

if [[ $verbose ]]; then
    echo "  ${#read1[@]} read1s: ${read1[@]}"
    echo "  ${#read2[@]} read2s: ${read2[@]}"
    echo " Input files post-curation 4"
    echo "  ${#read1[@]}files - read1s: ${read1[@]}"
    echo "  ${#read2[@]}files - read2s: ${read2[@]}"
    if [[ ${#index1[@]} -gt 0 ]]; then
        echo "  ${#index1[@]} I1s: ${index1[@]}"
        echo "  ${#index1[@]}files - I1s: ${index1[@]}"
    fi
    if [[ ${#index2[@]} -gt 0 ]]; then
        echo "  ${#index2[@]} I2s: ${index2[@]}"
        echo "  ${#index2[@]}files -  I2s: ${index2[@]}"
    fi
    echo "  input files adjusted for technology-specific conditions"
fi
@@ -1407,12 +1411,12 @@ elif [[ ${#index1[@]} -gt 0 ]]; then
fi

if [[ $verbose ]]; then
    echo "reads:" ${read12[@]}
    echo "files to be curated:" ${read12[@]}
fi

for fq in "${read12[@]}"; do
    if [[ $verbose ]]; then
        echo "read: $fq"
        echo " read file: $fq"
    fi
    name=`basename $fq`
    name=${name%.*}
@@ -1437,13 +1441,16 @@ for fq in "${read12[@]}"; do
        exit 1
    fi
    
    if [[ $verbose == true ]]; then
       echo "SAMPLE: $SAMPLE"
       echo "sample (field): $sn"
    if [[ $verbose ]]; then
        echo "  $sn(extracted from file) <- $fq"
        echo "  SAMPLE NAME: $SAMPLE"
    fi

    if [[ ${sn} != $SAMPLE ]]; then
        if [[ -z $SAMPLE ]]; then
            if [[ $verbose ]]; then
                echo "  setting SAMPLE NAME to ${sn}"
            fi
            SAMPLE=${sn}
        else
            echo "Error: some samples are labeled $SAMPLE while others are labeled $sn. cellranger can only handle files from one sample at a time."
@@ -1592,7 +1599,7 @@ fi



#####check if reference is present#####
####check if reference is present#####
if [[ -z $reference ]]; then
    if [[ $setup == "false" ]] || [[ ${#read1[@]} -ne 0 ]] || [[ ${#read2[@]} -ne 0 ]]; then
        echo "Error: option --reference is required";
@@ -1695,6 +1702,7 @@ crIN=${crIN}_${id}
##########



#####checking if crIN exists#####
if [[ ! -d $crIN ]]; then
    convert=true
@@ -1867,6 +1875,7 @@ if [[ $lock -eq 0 ]]; then
            sed -i "s/assert np.array_equal(in_mc.get_barcodes(), barcodes)/#assert np.array_equal(in_mc.get_barcodes(), barcodes)/g" ${cellrangerpath}-cs/${cellrangerversion}/lib/python/cellranger/molecule_counter.py
        fi
    fi
    
    #backup 10x navbar
    if [[ ! -f ${cellrangerpath}-cs/${cellrangerversion}/lib/python/cellranger/webshim/template/navbar.backup.html ]];then
        cp ${cellrangerpath}-cs/${cellrangerversion}/lib/python/cellranger/webshim/template/navbar.html ${cellrangerpath}-cs/${cellrangerversion}/lib/python/cellranger/webshim/template/navbar.backup.html
@@ -1880,6 +1889,7 @@ if [[ $lock -eq 0 ]]; then
        if [[ -f ${cellrangerpath}-cs/${cellrangerversion}/lib/python/cellranger/webshim/template/navbar.backup.html ]];then
            cp ${cellrangerpath}-cs/${cellrangerversion}/lib/python/cellranger/webshim/template/navbar.backup.html ${cellrangerpath}-cs/${cellrangerversion}/lib/python/cellranger/webshim/template/navbar.html
        fi
        
        #restore cloupe generation
        if [[ $verbose ]]; then
            echo "restore cloupe" 
@@ -1894,6 +1904,7 @@ if [[ $lock -eq 0 ]]; then
            echo "sed -i '/out cloupe *cloupe/ {s/^#*#//g}' ${cellrangerpath}-cs/${cellrangerversion}/mro/*mro"
        fi
        sed -i '/out cloupe *cloupe/ {s/^#*#//g}' ${cellrangerpath}-cs/${cellrangerversion}/mro/*mro
        
        #restore 11 lines for cloupe preprocess call (all following steps are needed to be called together or the call will break)
        ##restore defining CLOUPE_PREPROCESS
        if [[ $verbose ]]; then
@@ -1904,8 +1915,7 @@ if [[ $lock -eq 0 ]]; then
        sed -i '/output_for_cloupe/s/^#*#//g' ${cellrangerpath}-cs/${cellrangerversion}/mro/*mro 
        ##remove calling CLOUPE_PREPROCESS
        ### iterate over all files calling CLOUPE_PREPROCESS
        for file in $(grep -l  "call CLOUPE_PREPROCESS"  ${cellrangerpath}-cs/${cellrangerversion}/mro/*.mro )
        do
        for file in $(grep -l  "call CLOUPE_PREPROCESS"  ${cellrangerpath}-cs/${cellrangerversion}/mro/*.mro ); do
            #find  start of CLOUPE_PREPROCESS call
            num=$(grep -n "call CLOUPE_PREPROCESS" $file |  head -n 1 | cut -d":" -f1)
            #find end of CLOUPE_PREPROCESS call
@@ -1921,6 +1931,7 @@ if [[ $lock -eq 0 ]]; then
            #line of HTML in header is removed
            sed '/class="logo"/d' ${cellrangerpath}-cs/${cellrangerversion}/lib/python/cellranger/webshim/template/navbar.backup.html > ${cellrangerpath}-cs/${cellrangerversion}/lib/python/cellranger/webshim/template/navbar.html
        fi
        
        #disable cloupe generation
        if [[ $verbose ]]; then
            echo "disable cloupe"
@@ -2333,7 +2344,6 @@ else
            
            #returns a combined R1 file with I1-I2-R1 concatenated (I1 and I2 are R1 barcode)
            mv $crIN/Concatenated_File.fastq ${convR1}

        done
    fi
    
@@ -2491,7 +2501,7 @@ else
            
            echo "  ...remove internal for ${technology} by matching tag sequence for UMI reads"
            # filter UMI reads by matching tag sequence ATTGCGCAATG (bases 1-11 of R1) and remove as an adapters 
            perl sub/FilterSmartSeqReadUMI.pl --r1=${convR1} --r2=${convR2} --i1=${convI1} --i2=${convI2} --out_dir=$crIN --tag=
            perl sub/FilterSmartSeqReadUMI.pl --r1=${convR1} --r2=${convR2} --i1=${convI1} --i2=${convI2} --tag="ATTGCGCAATG" --out_dir=$crIN
            echo "  ...trim tag sequence from R1"
            
            # returns R1 with tag sequence removed (left trim) starting with 8pbp UMI and corresponding reads for I1, I2, and R2
@@ -2502,7 +2512,7 @@ else

            echo "  ...concatencate barcodes to R1 from I1 and I2 index files"
            # concatenate barcocdes from dual indexes to R1 as barcode (bases 1-16)
            perl sub/ConcatenateDualIndexBarcodes.pl --additive=${convI1} --additive=${convI2} --ref_fastq=${convR1} --tag="ATTGCGCAATG" --out_dir=$crIN
            perl sub/ConcatenateDualIndexBarcodes.pl --additive=${convI1} --additive=${convI2} --ref_fastq=${convR1} --out_dir=$crIN

            #returns a combined R1 file with I1-I2-R1 concatenated (I1 and I2 are R1 barcode)
            mv $crIN/Concatenated_File.fastq ${convR1}