Commit 30588308 authored by TomKellyGenetics's avatar TomKellyGenetics
Browse files

correct bug in rename files with extensions (dropseq tests passing)

parent 169d27bb
Loading
Loading
Loading
Loading
+67 −24
Original line number Diff line number Diff line
@@ -1019,13 +1019,13 @@ for key in ${keys[@]}; do
        fi
        
        if [[ -h $read ]]; then
            path=`readlink -f $read`
            fullpath=`readlink -f $read`
            if [[ $verbose ]]; then
                echo "***Warning: file $read not in current directory. Path to the file captured instead.***"
                echo " (file) $read"
                echo " (path) $path"
                echo " (path) $fullpath"
            fi
            read=${path}
            read=${fullpath}
        fi
        case $read in
            #check if contains lane before read
@@ -1073,15 +1073,58 @@ for key in ${keys[@]}; do
                    echo "  $read compatible with suffix"
                fi
            ;;
            *)
            *.*)
                #rename file
                if [[ $verbose ]]; then
                    echo "***Warning: file $read does not have suffix in its name. Suffix 001 is given.***"
                    echo "  renaming $read ..."
                fi
                rename -f "s/_${readkey}.*\./_${readkey}_001\./" ${read}*
                if [[  -f $(find $(dirname ${read}) -name $(basename ${read})'*.gz') ]]; then
                    rename -f "s/_${readkey}.(.*).gz/_${readkey}_001\.\$1.gz/g" ${read}*gz
#                    read=`echo $read | sed -e "s/_${readkey}/_${readkey}_001/g"`
                fi
                if [[ ${read} == *.gz ]]; then
                    rename -f "s/_${readkey}.(.*).gz/_${readkey}_001\.\$1.gz/g" ${read}
 #                   read=`echo $read | sed -e "s/_${readkey}.(.*).gz/_${readkey}_001\.\$1.gz/g"`
                fi
                if [[ -f $(find $(dirname ${read}) -name $(basename ${read})'*.fastq') ]]; then
                    rename -f "s/_${readkey}.(.*)/_${readkey}_001\.\$1/" ${read}*.fastq
#                    read=`echo $read | sed -e "s/_${readkey}(.*).fastq/_${readkey}_001.fastq/g"`
                fi
                if [[ -f $(find $(dirname ${read}) -name $(basename ${read})'*.fq') ]]; then
                    rename -f "s/_${readkey}.(.*)/_${readkey}_001\.\$1/" ${read}*.fq
 #                   read=`echo $read | sed -e "s/_${readkey}(.*).fq/_${readkey}_001.fq/g"`
                fi
                if [[ ${read} == *.fastq ]]; then
                    rename -f "s/_${readkey}(.*).fastq/_${readkey}_001\.fastq/" ${read} ${read}.gz
#                    read=`echo $read | sed -e "s/_${readkey}*.fastq/_${readkey}_001.fastq/g"`
               fi
              if [[ ${read} == *.fq ]]; then
                   rename -f "s/_${readkey}(.*).fq/_${readkey}_001\.fq/" ${read}
#                   read=`echo $read | sed -e "s/_${readkey}*.fq/_${readkey}_001\.fq/g"`
              fi
              #update file variable
              if [[ ${read} == *.gz ]] || [[ ${read} == *.fastq ]] || [[ ${read} == *.fq ]] || [[ -f ${read} ]]; then
                  #assumes read name already contains . in file extension
                  read=`echo $read | sed -e "s/_${readkey}.*\./_${readkey}_001\./g"`
              else
                  #replace everything after read key (R1, R2, I1, I2) with 001 suffix (detects file later)
                  rename -f "s/_${readkey}.*/_${readkey}_001/g" ${read}
                  read=`echo $read | sed -e "s/_${readkey}.*/_${readkey}_001/g"`
              fi
              #remove characters after read key (R1, R2, I1, I2) required as above
              if [[ ${read} != *_${readkey}_001.* ]] && [[ ${read} != *.* ]]; then
                  rename -f "s/_${readkey}_*\./_${readkey}_001\./" ${read}
                  read=`echo $read | sed -e "s/_${readkey}.*\./_${readkey}_001\./g"`
              elif [[ ${read} != *_${readkey}_001 ]] || [[ ${read} != *_${readkey}*00#1 ]]; then
                  rename -f "s/_${readkey}*_001/_${readkey}_001/" ${read#}
                  read=`echo $read | sed -e "s/_${readkey}*_001/_${readkey}_001/g"`
              fi
              if [[ ${read} == *_${readkey}_*_001.* ]]; then
                   rename -f "s/_${readkey}.*_001\./_${readkey}_001\./" ${read}
                   read=`echo $read | sed -e "s/_${readkey}*_001\./_${readkey}_001\./g"`
              fi

                list[$j]=$read
            ;;
        esac
+17 −8
Original line number Diff line number Diff line
@@ -27,29 +27,38 @@ if [[ -f test/shared/dropseq-test/*fastq.gz ]]; then
fi
# test manual setup
bash launch_universc.sh -t "nadia" --setup

# remove processed files
if [[ -f SRR1873277_S1_L001_R[12]_001.fastq ]]; then
    rm SRR1873277_S1_L001_R[12]_001.fastq
fi
if [[ -d input4cellranger_test-dropseq ]]; then
    rm -rf input4cellranger_test-dropseq
fi
# call on dropseq with files
bash launch_universc.sh --id "test-dropseq" --technology "nadia" \
 --reference "test/cellranger_reference/cellranger-tiny-ref/3.0.0" \
 --read1 "test/shared/dropseq-test/SRR1873277_Sample1_R1" \
 --read2 "test/shared/dropseq-test/SRR1873277_Sample1_R2" 
 --read2 "test/shared/dropseq-test/SRR1873277_Sample1_R2" \
 --verbose

#reset test data (files names)
if [[ -f test/shared/dropseq-test/SRR1873277_S1_L001_R[12]_001.fastq ]]; then
    gzip test/shared/dropseq-test/SRR1873277_S1_L001_R[12]_001.fastq
#reset test data (compress)
if [[ -f test/shared/dropseq-test/SRR1873277_Sample1_S1_L001_R1_001.fastq ]]; then
    gzip -f test/shared/dropseq-test/SRR1873277_Sample1_S1_L001_R1_001.fastq
fi
if [[ -f test/shared/dropseq-test/SRR1873277_Sample1_S1_L001_R2_001.fastq ]]; then
    gzip -f test/shared/dropseq-test/SRR1873277_Sample1_S1_L001_R2_001.fastq
fi
#reset test data (files names)
if [[ ! -f test/shared/dropseq-test/SRR1873277_Sample1_R1.fastq.gz ]]; then
    mv test/shared/dropseq-test/SRR1873277_S1_L001_R1_001.fastq.gz test/shared/dropseq-test/SRR1873277_Sample1_R1.fastq.gz
    mv test/shared/dropseq-test/SRR1873277_Sample1_S1_L001_R1_001.fastq.gz test/shared/dropseq-test/SRR1873277_Sample1_R1.fastq.gz
else
    rm test/shared/dropseq-test/SRR1873277_S1_L001_R1_001.fastq.gz
    rm test/shared/dropseq-test/SRR1873277_Sample1_S1_L001_R1_001.fastq.gz
fi
if [[ ! -f test/shared/dropseq-test/SRR1873277_Sample1_R2.fastq.gz ]]; then
    mv test/shared/dropseq-test/SRR1873277_S1_L001_R2_001.fastq.gz test/shared/dropseq-test/SRR1873277_Sample1_R1.fastq.gz
    mv test/shared/dropseq-test/SRR1873277_Sample1_S1_L001_R2_001.fastq.gz test/shared/dropseq-test/SRR1873277_Sample1_R2.fastq.gz
else
    rm test/shared/dropseq-test/SRR1873277_S1_L001_R2_001.fastq.gz
    rm test/shared/dropseq-test/SRR1873277_Sample1_S1_L001_R2_001.fastq.gz
fi

# compress all input files