Commit 7d66ebd8 authored by kai.b's avatar kai.b
Browse files

Modify the barcode list in the output gene barcode matrix to recover the original state

parent 00e036f9
Loading
Loading
Loading
Loading

BarcodeRecovery.pl

0 → 100644
+73 −0
Original line number Diff line number Diff line
#!/usr/bin/perl

#########################################
#                                       #
#     Written by Kai Battenberg         #
#     Plant Symbiosis Research Team     #
#                                       #
#########################################

use strict;
use warnings;

#####Options#####
my $barcodefile = $ARGV[0];
my $adjust_length = $ARGV[1];
my $cellranger_dir = $ARGV[2];
##########



#####Main#####
#make a hash of original and adjusted barcodes
my %barcodes;
open (ORIGINAL, "<$barcodefile") or die "cannot open $barcodefile.\n";
while (my $line = <ORIGINAL>) {
    my $original = $line;
    $original =~ s/\r//sig;
    $original =~ s/\n//sig;
    
    my $adjusted = $original;
    if ($adjust_length >= 0) {
        my $length = $adjust_length;
        $adjusted = substr ($length, 0, $adjusted);
    }
    else {
        my $length = abs ($adjust_length);
        my $As = "A" x $length;
        $adjusted = $As.$adjusted;
    }
    
    $barcodes{$adjusted} = $original;
}
close (ORIGINAL);

#get modified barcode list (raw)
my $raw_file = $cellranger_dir."/outs/raw_feature_bc_matrix/barcodes.tsv.gz";
my $modified_barcodes_raw = `zcat $raw_file | cut -f1 -d'-'`;
$modified_barcodes_raw =~ s/\n$//;
my @modified_barcodes_raw = split (/\n/, $modified_barcodes_raw);

#get modified barcode list (filtered)
my $filtered_file = $cellranger_dir."/outs/filtered_feature_bc_matrix/barcodes.tsv.gz";
my $modified_barcodes_filtered = `zcat $filtered_file | cut -f1 -d'-'`;
$modified_barcodes_filtered =~ s/\n$//;
my @modified_barcodes_filtered = split (/\n/, $modified_barcodes_filtered);

#replace barcodes
my $temp = "barcodes.txt";
open (TEMP, ">$temp") or die "cannot open $temp.\n";
foreach my $barcode (@modified_barcodes_raw) {
    print TEMP "$barcodes{$barcode}-1\n";
}
close (TEMP);
system "gzip -c $temp > $raw_file";

open (TEMP, ">$temp") or die "cannot open $temp.\n";
foreach my $barcode (@modified_barcodes_filtered) {
    print TEMP "$barcodes{$barcode}-1\n";
}
close (TEMP);
system "gzip -c $temp > $filtered_file";
unlink ($temp);
##########
+42 −17
Original line number Diff line number Diff line
@@ -12,7 +12,9 @@ use warnings;
use List::Util qw( min max sum);

#####Options#####
my $indir = $ARGV[0];
my $barcodefile=$ARGV[0];
my $adjust_length=$ARGV[1];
my $indir = $ARGV[2];
##########


@@ -38,6 +40,29 @@ elsif (! -e $feature_file || ! -e $raw_barcode_file || ! -e $filtered_barcode_fi


#####MAIN#####
#make a hash of original and adjusted barcodes
my %adjusted2original;
open (ORIGINAL, "<$barcodefile") or die "cannot open $barcodefile.\n";
while (my $line = <ORIGINAL>) {
    my $original = $line;
    $original =~ s/\r//sig;
    $original =~ s/\n//sig;
    
    my $adjusted = $original;
    if ($adjust_length >= 0) {
        my $length = $adjust_length;
        $adjusted = substr ($length, 0, $adjusted);
    }
    else {
        my $length = abs ($adjust_length);
        my $As = "A" x $length;
        $adjusted = $As.$adjusted;
    }
    
    $adjusted2original{$adjusted} = $original;
}
close (ORIGINAL);

#gene count
print " getting gene count.\n";

@@ -110,10 +135,10 @@ foreach my $barcode (@assigned_reads_percell) {
    $count =~ s/ $//;
    my @count = split (/ /, $count);
    
    if (exists $percell{$count[1]}) {
    if (exists $percell{ $adjusted2original{$count[1]} }) {
        my %count;
        $count{ASSIGNED} = $count[0];
        $percell{$count[1]} = \%count;
        $percell{ $adjusted2original{$count[1]} } = \%count;
    }
}
foreach my $barcode (sort keys %percell) {
@@ -138,10 +163,10 @@ foreach my $barcode (@mapped_reads_percell) {
    $count =~ s/ $//;
    my @count = split (/ /, $count);

    if (exists $percell{$count[1]}) {
        my %count = %{ $percell{$count[1]} };
    if (exists $percell{ $adjusted2original{$count[1]} }) {
        my %count = %{ $percell{ $adjusted2original{$count[1]} } };
        $count{MAPPED} = $count[0];
        $percell{$count[1]} = \%count;
        $percell{ $adjusted2original{$count[1]} } = \%count;
    }
}
foreach my $barcode (sort keys %percell) {
+17 −5
Original line number Diff line number Diff line
@@ -3,11 +3,12 @@
install=false

######convert version#####
convertversion="0.3.0.90005"
convertversion="0.3.0.90007"
##########


####cellrenger version#####

#####cellrenger version#####
cellrangerpath=`which cellranger` #location of cellranger
if [[ -z $cellrangerpath ]]; then
    echo "cellranger command is not found."
@@ -42,6 +43,7 @@ if [[ $RDIR != $SDIR ]]; then
fi
echo "Running launch_universc.sh in '$SDIR'"

BARCODERECOVER=${SDIR}/BarcodeRecovery.pl
PERCELLSTATS=${SDIR}/ExtractBasicStats.pl
##########

@@ -67,7 +69,7 @@ Mandatory arguments to long options are mandatory for short options too.
  -R2, --read2 FILE             Read 2 FASTQ file to pass to cellranger
  -f,  --file NAME              Path and the name of FASTQ files to pass to cellranger (prefix before R1 or R2)
                                e.g. /path/to/files/Example_S1_L001
       --trim                   run adapter and quality trimming on R2 files prior to running cellranger
  --trim                        run adapter and quality trimming on R2 files prior to running cellranger (underconstruction)
  
  -i,  --id ID                  A unique run id, used to name output folder
  -d,  --description TEXT       Sample description to embed in output files.
@@ -1137,10 +1139,20 @@ fi



#####Readjusting the barcodes in the cellranger output back to its original state##### 
if [[ ${barcodefile} != "default" ]]; then
    echo "replacing modified barcodes with the original in the output gene barcode matrix"
    perl ${BARCODERECOVER} ${barcodefile} ${barcodeadjust} ${id}
    echo "barcodes recovered"
fi
##########



#####extracting per cell data#####
if [[ $percelldata == true ]]; then
    echo "generating basic run statistics and per cell data"
    perl ${PERCELLSTATS} ${id}
    perl ${PERCELLSTATS} ${barcodefile} ${barcodeadjust} ${id}
    echo "per cell data generated"
fi
##########