Commit 863c6d2e authored by kbattenb's avatar kbattenb
Browse files

convert.sh syntax now internally consistent

parent 465e29dd
Loading
Loading
Loading
Loading
+213 −248
Original line number Diff line number Diff line
@@ -8,12 +8,14 @@ if [[ -z $cellrangerpass ]]; then
    echo "cellranger command is not found."
    exit 1
fi
ver_info=`paste -d "\n" <(cellranger count --version) <(echo conversion script version 0.2.0.9001) | head -n 3 | tail -n 2`
ver_info=`paste -d "\n" <(cellranger count --version) <(echo conversion script version 0.2.0.9002) | head -n 3 | tail -n 2`
##########

#####locate script for importing barcodes######


#####locate launch_universc.sh for importing barcodes######
SOURCE="${BASH_SOURCE[0]}"
while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symlink
while [[ -h "$SOURCE" ]]; do #resolve $SOURCE until the file is no longer a symlink
    TARGET="$(readlink "$SOURCE")"
    if [[ $TARGET == /* ]]; then
        echo "SOURCE '$SOURCE' is an absolute symlink to '$TARGET'"
@@ -21,16 +23,18 @@ while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symli
    else
        SCRIPT_DIR="$( dirname "$SOURCE" )"
        echo "SOURCE '$SOURCE' is a relative symlink to '$TARGET' (relative to '$SCRIPT_DIR')"
    SOURCE="$SCRIPT_DIR/$TARGET" # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located
        SOURCE="$SCRIPT_DIR/$TARGET" #if $SOURCE is a relative symlink, we need to resolve it relative to the path where the symlink file was located
    fi
done
echo "SOURCE is '$SOURCE'"
RDIR="$( dirname "$SOURCE" )"
SCRIPT_DIR="$( cd -P "$( dirname "$SOURCE" )" >/dev/null 2>&1 && pwd )"
if [ "$DIR" != "$RDIR" ]; then
if [[ $RDIR != $SCRIPT_DIR ]]; then
    echo "DIR '$RDIR' resolves to '$SCRIPT_DIR'"
fi
echo "Running convertion script in '$SCRIPT_DIR'"
echo "Running launch_universc.sh in '$SCRIPT_DIR'"
##########



#####usage statement#####
help='
@@ -62,7 +66,7 @@ Mandatory arguments to long options are mandatory for short options too.
  -v,  --version                Output version information and exit
       --verbose                Print additional outputs for debugging

For each fastq file, follow the following naming convention:
For each fastq file, follow the naming convention below:
  <SampleName>_<SampleNumber>_<LaneNumber>_<ReadNumber>_001.fastq
  e.g. EXAMPLE_S1_L001_R1_001.fastq
       Example_S4_L002_R2_001.fastq.gz
@@ -266,60 +270,54 @@ done

#####check if UniverSC is running already#####
#create .lock file if none exists
if [[ ! -f  ${DIR}-cs/${VERSION}/lib/python/cellranger/barcodes/.lock ]]
    then
if [[ ! -f  ${DIR}-cs/${VERSION}/lib/python/cellranger/barcodes/.lock ]]; then
    echo "creating lock file"
    echo 0 > ${DIR}-cs/${VERSION}/lib/python/cellranger/barcodes/.lock
fi
#import lock counter

#check if jobs running (check value in .lock file)
echo "checking .lock file"
lock=`cat ${DIR}-cs/${VERSION}/lib/python/cellranger/barcodes/.lock`
#check if jobs running
if [[ ! $lock == "0" ]]
    then
    echo "$lock number of cellranger ${VERSION} jobs running in ${DIR}"

if [[ ! $lock == "0" ]]; then
    echo " $lock number of cellranger ${VERSION} jobs already running in ${DIR}"
    #check technology current running
    if [[ -f ${DIR}-cs/${VERSION}/lib/python/cellranger/barcodes/.last_called ]]
        then
    if [[ -f ${DIR}-cs/${VERSION}/lib/python/cellranger/barcodes/.last_called ]]; then
        last=`cat ${DIR}-cs/${VERSION}/lib/python/cellranger/barcodes/.last_called`
        echo "running technology $last with $lock jobs"
        #check if currently running technology is different to convert call
        if [[ $last == $technology ]]
           then
        echo " running $lock jobs with technology $last"
        #check if the technology running is different from the current convert call
        if [[ $last == $technology ]]; then
            echo " no conflict detected"
           #add disable increment for setup calls (which aren't counted or removed)
           if [[ $setup == false ]]
               then
            #add disable increment for setup calls (which are not counted or removed)
            if [[ $setup == false ]]; then
                #add current job to lock
                echo " increment lock"
                lock=$(($lock+1))
                echo $lock > ${DIR}-cs/${VERSION}/lib/python/cellranger/barcodes/.lock
                echo " call accepted: running $lock cellranger jobs on $technology"
            fi
           echo "call accepted: running $lock cellranger calls on $technology"
	else
           echo "conflict between $technology and current $lock cellranger runs on $last"
           echo "***Please hold calls for $technology until jobs running $last are completed"
           echo "***Warning: remove ${DIR}-cs/${VERSION}/lib/python/cellranger/barcodes/.lock if $last jobs have completed or aborted"
           echo "***Error: barcode whitelist configured for currently running technology: $last" 
	    echo "Error: conflict between technology selected for the new job ($technology) and for $lock jobs currently running ($last)"
            echo "barcode whitelist configured and locked for currently running technology: $last"
            echo "remove ${DIR}-cs/${VERSION}/lib/python/cellranger/barcodes/.lock if $last jobs have completed or aborted"
            exit 1
        fi
    fi
else
    ## initialise lock file if first call (no other jobs running)
    #add disable increment for setup calls (which aren't counted or removed)
    if [[ $setup == false ]]
        then
    #initialise lock file if first call (no other jobs running)
    #add disable increment for setup calls (which are not counted or removed)
    if [[ $setup == false ]]; then
        #add current job to lock
        echo " increment lock"
        lock=$(($lock+1))
        echo "no other jobs running: $lock initiated for $technology"
        echo " call accepted: running no other cellranger jobs"
        echo $lock > ${DIR}-cs/${VERSION}/lib/python/cellranger/barcodes/.lock
    fi
fi

##########

#####check if input maches expected inputs#####
if [[ $verbose == "true" ]]
    then
if [[ $verbose == "true" ]]; then
    echo "checking options ..."
fi

@@ -346,95 +344,70 @@ fi

#check for file type (extension) for files
##allows incomplete file names and processing compressed files
for i in ${!read1[@]}
do
for i in ${!read1[@]}; do
    read=${read1[$i]}
    if [[ $verbose == "true" ]];
        then
    if [[ $verbose == "true" ]]; then
        echo " checking file format for $read1 ..."
    fi
    if [ -f $read ] && [ ! -h $read ]
        then
        if [[ $read != *"gz" ]]
            then
    if [[ -f $read ]] && [[ -h $read ]]; then
        if [[ $read == *"gz" ]]; then
            gunzip -k $read
            #update file variable
            read=`echo $read | sed -e "s/\.gz//g"`
        fi
        if [[ $read != *"fastq" ]] || [[ $read != *"fq" ]]
            then
            echo "Warning: file $read expected to be in fastq format"
        if [[ $read != *"fastq" ]] && [[ $read != *"fq" ]]; then
            echo "***Warning: file $read is assubed to be in fastq format***"
        fi
        echo $read
    elif [ -f ${read}.fq ] && [ ! -h $read ]
    then
        read=${read}.fq
        echo $read
    elif [ -f ${read}.fastq ] && [ ! -h $read ]
        then
        read=${read}.fastq
        echo $read
    elif [ -f ${read}.fq.gz ] && [ ! -h $read ]
        then
        gunzip -k ${read}.fq.gz
        read=${read}.fq
        echo $read
    elif [ -f ${read}.fastq.gz ] && [ ! -h $read ]
        then
        gunzip -k ${read}.fastq.gz
        read=${read}.fastq
        echo $read
        echo "  $read"
    elif [[ -f $read ]]; then
        if [[ $read == *"gz" ]]; then
            gunzip -k $read
            #update file variable
            read=`echo $read | sed -e "s/\.gz//g"`
        fi
        if [[ $read != *"fastq" ]] && [[ $read != *"fq" ]]; then
            echo "***Warning: file $read is assubed to be in fastq format***"
        fi
        echo "  $read"
    else
        echo $read not found
        echo "Error: $read not found"
        exit 1
    fi
    read1[$i]=$read
done

for i in ${!read2[@]}
do
for i in ${!read2[@]}; do
    read=${read2[$i]}
    if [[ $verbose == "true" ]];
        then
    if [[ $verbose == "true" ]]; then
        echo " checking file format for $read2 ..."
    fi
    if [ -f $read ] && [ ! -h $read ]
        then
        if [[ $read != *"gz" ]]
            then
    if [[ -f $read ]] && [[ -h $read ]]; then
        if [[ $read == *"gz" ]]; then
            gunzip -k $read
            #update file variable
            read=`echo $read | sed -e "s/\.gz//g"`
        fi
        if [[ $read != *"fastq" ]] || [[ $read != *"fq" ]]
            then
            echo "Warning: file $read expected to be in fastq format"
        if [[ $read != *"fastq" ]] && [[ $read != *"fq" ]]; then
            echo "***Warning: file $read is assubed to be in fastq format***"
        fi
        echo $read
    elif [ -f ${read}.fq ] && [ ! -h $read ]
    then
        read=${read}.fq
        echo $read
    elif [ -f ${read}.fastq ] && [ ! -h $read ]
        then
        read=${read}.fastq
        echo $read
    elif [ -f ${read}.fq.gz ] && [ ! -h $read ]
        then
        gunzip -k ${read}.fq.gz
        read=${read}.fq
        echo $read
    elif [ -f ${read}.fastq.gz ] && [ ! -h $read ]
        then
        gunzip -k ${read}.fastq.gz
        read=${read}.fastq
        echo $read
        echo "  $read"
    elif [[ -f $read ]]; then
        if [[ $read == *"gz" ]]; then
            gunzip -k $read
            #update file variable
            read=`echo $read | sed -e "s/\.gz//g"`
        fi
        if [[ $read != *"fastq" ]] && [[ $read != *"fq" ]]; then
            echo "***Warning: file $read is assubed to be in fastq format***"
        fi
        echo "  $read"
    else
        echo $read not found
        echo "Error: $read not found"
        exit 1
    fi
    read2[$i]=$read
done


#renaming read1 and read 2 files if not compatible with the convention.
if [[ $verbose == "true" ]]; then
    echo " checking file name for $read1 ..."
@@ -444,7 +417,7 @@ for i in ${!read1[@]}; do
    if [[ -h $read ]]; then
        path=`readlink -f $read`
        if [[ $verbose == "true" ]]; then
            echo " ***Warning: file $read not in current directory. Path to the file captured instead***"
            echo " ***Warning: file $read not in current directory. Path to the file captured instead.***"
            echo "  (file) $read"
            echo "  (path) $path"
        fi
@@ -452,23 +425,21 @@ for i in ${!read1[@]}; do
    fi
    case $read in
        #check if contains lane before read
        *_L0[0123456789][0123456789]_R[12]*)
        *_L0[0123456789][0123456789]_R1*)
            if [[ $verbose == "true" ]]; then
                echo "  $read compatible with lane"
            fi
        ;;
        *) echo "  converting $read ..."
        *) 
            #rename file
            if [[ $verbose == "true" ]]; then
            echo "   assuming 1 lane if not given"
                echo "***Warning: file $read does not have lane value in its name. Lane 1 is assumed.***"
	        echo "  renaming $read ..."
            fi
            rename "s/_R1/_L001_R1/" $read
            #update file variable
            read=`echo $read | sed -e "s/_R1/_L001_R1/g"`
            read1[$i]=$read
        if [[ $verbose == "true" ]]; then
            echo "   renaming $read ..."
        fi
        ;;
    esac
    case $read in
@@ -479,18 +450,16 @@ for i in ${!read1[@]}; do
            fi
        ;;
        *)
            #rename file
            if [[ $verbose == "true" ]]; then
            echo "  converting $read ..."
                echo "***Warning: file $read does not have sample value in its name. Sample $j is assumed.***"
	        echo "  renaming $read ..."
            fi
        #rename file
	    j=$((${i}+1))
            rename "s/_L0/_S${j}_L0/" $read
            #update file variable
            read=`echo $read | sed -e  "s/_L0/_S${j}_L0/g"`
            read1[$i]=$read
        if [[ $verbose == "true" ]]; then
            echo "   renaming $read ..."
        fi
        ;;
    esac
    case $read in
@@ -501,20 +470,19 @@ for i in ${!read1[@]}; do
            fi
        ;;
        *)
            #rename file
            if [[ $verbose == "true" ]]; then
            echo "  converting $read ..."
                echo "***Warning: file $read does not have suffix in its name. Suffix 001 is given.***"
                echo "  renaming $read ..."
            fi
        #rename file
	    rename "s/_R1.*\./_R1_001\./" $read
            #update file variable
            read=`echo $read | sed -e  "s/_R1.*\./_R1_001\./g"`
            read1[$i]=$read
        if [[ $verbose == "true" ]]; then
            echo "   renaming $read"
        fi
        ;;
    esac
done

if [[ $verbose == "true" ]]; then
    echo " checking file name for $read2 ..."
fi
@@ -523,7 +491,7 @@ for i in ${!read2[@]}; do
    if [[ -h $read ]]; then
        path=`readlink -f $read`
        if [[ $verbose == "true" ]]; then
            echo " ***Warning: file $read not in current directory. Path to the file captured instead***"
            echo " ***Warning: file $read not in current directory. Path to the file captured instead.***"
            echo " (file) $read"
            echo " (path) $path"
        fi
@@ -531,23 +499,21 @@ for i in ${!read2[@]}; do
    fi
    case $read in
        #check if contains lane before read
        *_L0[0123456789][0123456789]_R[12]*)
        *_L0[0123456789][0123456789]_R2*)
            if [[ $verbose == "true" ]]; then
                echo "  $read compatible with lane"
            fi
        ;;
        *) echo "  converting $read ..."
        *) 
            #rename file
            if [[ $verbose == "true" ]]; then
            echo "   assuming 1 lane if not given"
                echo "***Warning: file $read does not have lane value in its name. Lane 1 is assumed.***"
	        echo "  renaming $read ..."
            fi
            rename "s/_R2/_L001_R2/" $read
            #update file variable
            read=`echo $read | sed -e "s/_R2/_L001_R2/g"`
            read2[$i]=$read
        if [[ $verbose == "true" ]]; then
            echo "   renaming $read ..."
        fi
        ;;
    esac
    case $read in
@@ -558,18 +524,16 @@ for i in ${!read2[@]}; do
            fi
        ;;
        *)
            #rename file
            if [[ $verbose == "true" ]]; then
            echo "  converting $read ..."
                echo "***Warning: file $read does not have sample value in its name. Sample $j is assumed.***"
	        echo "  renaming $read ..."
            fi
        #rename file
	    j=$((${i}+1))
            rename "s/_L0/_S${j}_L0/" $read
            #update file variable
            read=`echo $read | sed -e  "s/_L0/_S${j}_L0/g"`
            read2[$i]=$read
        if [[ $verbose == "true" ]]; then
            echo "   renaming $read ..."
        fi
        ;;
    esac
    case $read in
@@ -580,17 +544,15 @@ for i in ${!read2[@]}; do
            fi
        ;;
        *)
            #rename file
            if [[ $verbose == "true" ]]; then
            echo "  converting $read ..."
                echo "***Warning: file $read does not have suffix in its name. Suffix 001 is given.***"
                echo "  renaming $read ..."
            fi
        #rename file
	    rename "s/_R2.*\./_R2_001\./" $read
            #update file variable
            read=`echo $read | sed -e  "s/_R2.*\./_R2_001\./g"`
            read2[$i]=$read
        if [[ $verbose == "true" ]]; then
            echo "   renaming $read"
        fi
        ;;
    esac
done
@@ -653,7 +615,7 @@ elif [[ $id == *" "* ]]; then
fi

#check if reference is present
if [[ -z $reference ]] && [[ $setup == "false" ]]; then
if [[ -z $reference ]] && [[ $setup != "false" ]]; then
    echo "Error: option --reference is required"
    exit 1
fi
@@ -693,8 +655,8 @@ fi


####report inputs#####
echo "
#####Input information#####"
echo ""
echo "#####Input information#####"
echo "SETUP: $setup"
if ! [[ $setup == "false" ]]; then
    echo "***Warning: whitelist is converted for compatibility, valid barcodes cannot be detected accurately with this technology***"
@@ -1099,30 +1061,33 @@ end=`date +%s`
runtime=$((end-start))
##########



#####remove files if convert is not running elsewhere#####
echo "updating .lock file"

#reset lock counter (read in case changed by other jobs)
lock=`cat ${DIR}-cs/${VERSION}/lib/python/cellranger/barcodes/.lock`

#remove currewnt job from counter (successfully completed)
lock=$(($lock-1))
#check if jobs running
if [[ $lock -ge 1 ]]
    then
if [[ $lock -ge 1 ]]; then
    echo "$lock number of cellranger ${VERSION} jobs still running in ${DIR}"
    #check technology current running
    if [[ -f ${DIR}-cs/${VERSION}/lib/python/cellranger/barcodes/.last_called ]]
        then
    if [[ -f ${DIR}-cs/${VERSION}/lib/python/cellranger/barcodes/.last_called ]]; then
        last=`cat ${DIR}-cs/${VERSION}/lib/python/cellranger/barcodes/.last_called`
        echo "running technology $last with $lock jobs"
    fi
fi

#remove .lock file if no other jobs running exists (prevents negative values allowing technologies to run at same time)
if [[ $lock -le 0 ]]
    then
if [[ $lock -le 0 ]]; then
    echo "no other jobs currently running: lock files cleared for cellranger ${VERSION} in ${DIR}"
    echo "no conflicts: whitelist can now be changed for other technologies"
    rm -f ${DIR}-cs/${VERSION}/lib/python/cellranger/barcodes/.lock
fi
##########