Commit f818ab85 authored by TomKellyGenetics's avatar TomKellyGenetics
Browse files

Merge branch 'kai' of dgt-gitlab.gsc.riken.jp:tom/cellranger_convert

parents cf49e094 7d36b435
Loading
Loading
Loading
Loading
+42 −17
Original line number Diff line number Diff line
@@ -12,7 +12,9 @@ use warnings;
use List::Util qw( min max sum);

#####Options#####
my $indir = $ARGV[0];
my $barcodefile=$ARGV[0];
my $adjust_length=$ARGV[1];
my $indir = $ARGV[2];
##########


@@ -38,6 +40,29 @@ elsif (! -e $feature_file || ! -e $raw_barcode_file || ! -e $filtered_barcode_fi


#####MAIN#####
#make a hash of original and adjusted barcodes
my %adjusted2original;
open (ORIGINAL, "<$barcodefile") or die "cannot open $barcodefile.\n";
while (my $line = <ORIGINAL>) {
    my $original = $line;
    $original =~ s/\r//sig;
    $original =~ s/\n//sig;
    
    my $adjusted = $original;
    if ($adjust_length >= 0) {
        my $length = $adjust_length;
        $adjusted = substr ($length, 0, $adjusted);
    }
    else {
        my $length = abs ($adjust_length);
        my $As = "A" x $length;
        $adjusted = $As.$adjusted;
    }
    
    $adjusted2original{$adjusted} = $original;
}
close (ORIGINAL);

#gene count
print " getting gene count.\n";

@@ -110,10 +135,10 @@ foreach my $barcode (@assigned_reads_percell) {
    $count =~ s/ $//;
    my @count = split (/ /, $count);
    
    if (exists $percell{$count[1]}) {
    if (exists $percell{ $adjusted2original{$count[1]} }) {
        my %count;
        $count{ASSIGNED} = $count[0];
        $percell{$count[1]} = \%count;
        $percell{ $adjusted2original{$count[1]} } = \%count;
    }
}
foreach my $barcode (sort keys %percell) {
@@ -138,10 +163,10 @@ foreach my $barcode (@mapped_reads_percell) {
    $count =~ s/ $//;
    my @count = split (/ /, $count);

    if (exists $percell{$count[1]}) {
        my %count = %{ $percell{$count[1]} };
    if (exists $percell{ $adjusted2original{$count[1]} }) {
        my %count = %{ $percell{ $adjusted2original{$count[1]} } };
        $count{MAPPED} = $count[0];
        $percell{$count[1]} = \%count;
        $percell{ $adjusted2original{$count[1]} } = \%count;
    }
}
foreach my $barcode (sort keys %percell) {

RecoverBarcodes.pl

0 → 100644
+73 −0
Original line number Diff line number Diff line
#!/usr/bin/perl

#########################################
#                                       #
#     Written by Kai Battenberg         #
#     Plant Symbiosis Research Team     #
#                                       #
#########################################

use strict;
use warnings;

#####Options#####
my $barcodefile = $ARGV[0];
my $adjust_length = $ARGV[1];
my $cellranger_dir = $ARGV[2];
##########



#####Main#####
#make a hash of original and adjusted barcodes
my %barcodes;
open (ORIGINAL, "<$barcodefile") or die "cannot open $barcodefile.\n";
while (my $line = <ORIGINAL>) {
    my $original = $line;
    $original =~ s/\r//sig;
    $original =~ s/\n//sig;
    
    my $adjusted = $original;
    if ($adjust_length >= 0) {
        my $length = $adjust_length;
        $adjusted = substr ($length, 0, $adjusted);
    }
    else {
        my $length = abs ($adjust_length);
        my $As = "A" x $length;
        $adjusted = $As.$adjusted;
    }
    
    $barcodes{$adjusted} = $original;
}
close (ORIGINAL);

#get modified barcode list (raw)
my $raw_file = $cellranger_dir."/outs/raw_feature_bc_matrix/barcodes.tsv.gz";
my $modified_barcodes_raw = `zcat $raw_file | cut -f1 -d'-'`;
$modified_barcodes_raw =~ s/\n$//;
my @modified_barcodes_raw = split (/\n/, $modified_barcodes_raw);

#get modified barcode list (filtered)
my $filtered_file = $cellranger_dir."/outs/filtered_feature_bc_matrix/barcodes.tsv.gz";
my $modified_barcodes_filtered = `zcat $filtered_file | cut -f1 -d'-'`;
$modified_barcodes_filtered =~ s/\n$//;
my @modified_barcodes_filtered = split (/\n/, $modified_barcodes_filtered);

#replace barcodes
my $temp = "barcodes.txt";
open (TEMP, ">$temp") or die "cannot open $temp.\n";
foreach my $barcode (@modified_barcodes_raw) {
    print TEMP "$barcodes{$barcode}-1\n";
}
close (TEMP);
system "gzip -c $temp > $raw_file";

open (TEMP, ">$temp") or die "cannot open $temp.\n";
foreach my $barcode (@modified_barcodes_filtered) {
    print TEMP "$barcodes{$barcode}-1\n";
}
close (TEMP);
system "gzip -c $temp > $filtered_file";
unlink ($temp);
##########
+18 −5
Original line number Diff line number Diff line
@@ -3,11 +3,12 @@
install=false

######convert version#####
convertversion="0.3.0.90004"
convertversion="0.3.0.90008"
##########


####cellrenger version#####

#####cellrenger version#####
cellrangerpath=`which cellranger` #location of cellranger
if [[ -z $cellrangerpath ]]; then
    echo "cellranger command is not found."
@@ -42,6 +43,7 @@ if [[ $RDIR != $SDIR ]]; then
fi
echo "Running launch_universc.sh in '$SDIR'"

BARCODERECOVER=${SDIR}/RecoverBarcodes.pl
PERCELLSTATS=${SDIR}/ExtractBasicStats.pl
##########

@@ -67,6 +69,7 @@ Mandatory arguments to long options are mandatory for short options too.
  -R2, --read2 FILE             Read 2 FASTQ file to pass to cellranger
  -f,  --file NAME              Path and the name of FASTQ files to pass to cellranger (prefix before R1 or R2)
                                e.g. /path/to/files/Example_S1_L001
  
  -i,  --id ID                  A unique run id, used to name output folder
  -d,  --description TEXT       Sample description to embed in output files.
  -r,  --reference DIR          Path of directory containing 10x-compatible reference.
@@ -1130,10 +1133,20 @@ fi



#####Readjusting the barcodes in the cellranger output back to its original state##### 
if [[ ${barcodefile} != "default" ]]; then
    echo "replacing modified barcodes with the original in the output gene barcode matrix"
    perl ${BARCODERECOVER} ${barcodefile} ${barcodeadjust} ${id}
    echo "barcodes recovered"
fi
##########



#####extracting per cell data#####
if [[ $percelldata == true ]]; then
    echo "generating basic run statistics and per cell data"
    perl ${PERCELLSTATS} ${id}
    perl ${PERCELLSTATS} ${barcodefile} ${barcodeadjust} ${id}
    echo "per cell data generated"
fi
##########