Commit 4e4a0317 authored by TomKellyGenetics's avatar TomKellyGenetics
Browse files

Merge branch 'master' into dev

parents 6290977b 46cfc8d8
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -39,6 +39,7 @@ test/cellranger_reference/cellranger-tiny-ref/3.0.0/star/chrStart.txt filter=lfs
/home/tom/repos/cellranger_convert/test/shared/cellranger-tiny-fastq/3.0.0/tinygex_S1_L002_R1_001.fastq.gz filter=lfs diff=lfs merge=lfs -text
/home/tom/repos/cellranger_convert/test/shared/cellranger-tiny-fastq/3.0.0/tinygex_S1_L002_R2_001.fastq.gz filter=lfs diff=lfs merge=lfs -text
/home/tom/repos/cellranger_convert/test/shared/cellranger-tiny-fastq/1.2.0/read-I1_si-TTTCATGA_lane-008-chunk-001.fastq.gz filter=lfs diff=lfs merge=lfs -text
/home/tom/repos/cellranger_convert/test/shared/cellranger-tiny-fastq/1.2.0/read-RA_si-TTTCATGA_lane-008-chunk-001.fastq.gz filter=lfs diff=lfs merge=lfs -text
/home/tom/repos/cellranger_convert/test/shared/cellranger-tiny-fastq/3.0.0/tinygex_S1_L001_I1_001.fastq.gz filter=lfs diff=lfs merge=lfs -text
/home/tom/repos/cellranger_convert/test/shared/cellranger-tiny-fastq/3.0.0/tinygex_S1_L001_R1_001.fastq.gz filter=lfs diff=lfs merge=lfs -text
/home/tom/repos/cellranger_convert/test/shared/cellranger-tiny-fastq/3.0.0/tinygex_S1_L001_R2_001.fastq.gz filter=lfs diff=lfs merge=lfs -text
@@ -59,6 +60,7 @@ test/shared/smartseq3-test/Smartseq3_diySpike_R2.fastq.gz filter=lfs diff=lfs me
test/shared/sciseq-v3-test/SRR7827205_S1_R1.fastq.gz filter=lfs diff=lfs merge=lfs -text
test/shared/sciseq-v3-test/SRR7827205_S1_R2.fastq.gz filter=lfs diff=lfs merge=lfs -text
test/shared/icell8-test/72618_KU812_L002_R2_001.fastq.gz filter=lfs diff=lfs merge=lfs -text
test/shared/cellranger-tiny-fastq/1.2.0/read-RA_si-TTTCATGA_lane-008-chunk-001.fastq.gz filter=lfs diff=lfs merge=lfs -text
test/shared/icell8-test/72618_KU812_L001_R1_001.fastq.gz filter=lfs diff=lfs merge=lfs -text
test/shared/icell8-test/72618_KU812_L001_R2_001.fastq.gz filter=lfs diff=lfs merge=lfs -text
test/shared/icell8-test/72618_KU812_L002_R1_001.fastq.gz filter=lfs diff=lfs merge=lfs -text
@@ -70,3 +72,11 @@ test/shared/cellranger-tiny-fastq/3.0.0/tinygex_S1_L002_R1_001.fastq.gz filter=l
test/shared/cellranger-tiny-fastq/3.0.0/tinygex_S1_L002_R2_001.fastq.gz filter=lfs diff=lfs merge=lfs -text
whitelists/KAPA_UDI_Index2_i5.txt filter=lfs diff=lfs merge=lfs -text
whitelists/KAPA_UDI_Index1_i7.txt filter=lfs diff=lfs merge=lfs -text
test/shared/smartseq3-test/test-smartseq-hek293t_S2_L001_R1_001.fastq.gz filter=lfs diff=lfs merge=lfs -text
test/shared/smartseq3-test/test-smartseq-hek293t_S2_L001_R2_001.fastq.gz filter=lfs diff=lfs merge=lfs -text
test/shared/smartseq3-test/test-smartseq-hek293t_S2_L002_I1_001.fastq.gz filter=lfs diff=lfs merge=lfs -text
test/shared/smartseq3-test/test-smartseq-hek293t_S2_L002_I2_001.fastq.gz filter=lfs diff=lfs merge=lfs -text
test/shared/smartseq3-test/test-smartseq-hek293t_S2_L002_R1_001.fastq.gz filter=lfs diff=lfs merge=lfs -text
test/shared/smartseq3-test/test-smartseq-hek293t_S2_L002_R2_001.fastq.gz filter=lfs diff=lfs merge=lfs -text
test/shared/smartseq3-test/test-smartseq-hek293t_S2_L001_I1_001.fastq.gz filter=lfs diff=lfs merge=lfs -text
test/shared/smartseq3-test/test-smartseq-hek293t_S2_L001_I2_001.fastq.gz filter=lfs diff=lfs merge=lfs -text
+1 −1
Original line number Diff line number Diff line
1.1.4
1.2.2
+67 −1
Original line number Diff line number Diff line
### 1.2.2

- bug fixes for detecting I1 and I2 files

- updates smartseq2 parameters for optional full-length or 5' end alignment (non-UMI read counts)

- document full-length chemistry parameters

### 1.2.1

- bug fixes for R3 and R4 ffile inputs

- bug fixes for compressed fastq.gz in I1 and I2

- corrected whitelist for smartseq test job

### 1.2.0

- backend to fully functional GUI app

- updates to support 10x 3` scRNA version 1

- updates test jobs and test data for SmartSeq3

- automated file detection for R3 and R4 read files

### 1.1.7

- uses reverse complement barcode whitelist for inDrops v1 and v2 (automatically generated)

- updated matching to remove adapter sequence between barcodes tested locally

- updates to support legacy versions (v1 and v2) of inDrops protocol

### 1.1.6.2

- documents GUI application

- documents pre-generated references

### 1.1.6.1

- updates large files in Docker image

### 1.1.6

- update building STAR in Docker containers

- updates to syntax in script and subroutines

- bug fixes to allow running without --verbose parameters

- minor updates to documentation for added technologies

### 1.1.5

- add subroutine to support custom references

- update Docker build parameters

- supports application with graphical user interface

### 1.1.4

version in development
- add test data for Smart-Seq3

- update handing chemistry in set up

- correct STAR in docker container

### 1.1.3

+6 −2
Original line number Diff line number Diff line
# FROM ubuntu:bionic
FROM tomkellygenetics/cellranger_clean:latest
FROM tomkellygenetics/cellranger_clean:3.0.2.9002

RUN apt-get update \
 && apt-get upgrade -y \
@@ -20,7 +20,7 @@ RUN apt-get install -y \
RUN git clone "https://github.com/TomKellyGenetics/universc.git"

RUN cd universc/test/cellranger_reference/cellranger-tiny-ref/ \
# && git lfs pull \
 && git lfs pull \
 && rm -rf 3.0.0 1.2.0 \ 
 && cellranger mkref --genome=3.0.0 --fasta=genome-3.0.0.fa --genes=genes-3.0.0.gtf \
 && cellranger mkref --genome=1.2.0 --fasta=genome-1.2.0.fa --genes=genes-1.2.0.gtf 
@@ -80,3 +80,7 @@ RUN wget https://github.com/alexdobin/STAR/archive/2.5.1b.tar.gz \
#  && tar -xvzf BBMap_38.87.tar.gz

# ENV PATH bbmap:$PATH
# ENV PATH bbmap:$PATH

RUN cp /cellranger-3.0.2.9001/cellranger-cs/3.0.2.9001/lib/python/cellranger/chemistry.py /cellranger-3.0.2.9001/cellranger-cs/3.0.2.9001/lib/python/cellranger/check.py
 
+43 −17
Original line number Diff line number Diff line
@@ -34,9 +34,9 @@ tags:
![Docker Stars](https://img.shields.io/docker/stars/tomkellygenetics/universc)
![Docker Pulls](https://img.shields.io/docker/pulls/tomkellygenetics/universc)

![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/tomkellygenetics/universc/1.1.4)
![MicroBadger Layers (latest)](https://img.shields.io/microbadger/layers/tomkellygenetics/universc/latest?label="layers@1.1.4")
![Docker Image Size (v1.1.4)](https://img.shields.io/docker/image-size/tomkellygenetics/universc/1.1.4?label="image%20size@1.1.4)
![Docker Image Version (tag latest semver)](https://img.shields.io/docker/v/tomkellygenetics/universc/1.2.1)
![MicroBadger Layers (latest)](https://img.shields.io/microbadger/layers/tomkellygenetics/universc/latest?label="layers@1.2.1")
![Docker Image Size (v1.2.1)](https://img.shields.io/docker/image-size/tomkellygenetics/universc/1.2.1?label="image%20size@1.2.1)
![Docker Image Version (latest by date)](https://img.shields.io/docker/v/tomkellygenetics/universc/latest)
![MicroBadger Layers (latest)](https://img.shields.io/microbadger/layers/tomkellygenetics/universc/latest)
![Docker Image Size (latest)](https://img.shields.io/docker/image-size/tomkellygenetics/universc/latest)
@@ -61,8 +61,8 @@ tags:

![GitHub all releases](https://img.shields.io/github/downloads/minoda-lab/universc/total?label=GitHub%20downloads)
![GitHub release (latest by date)](https://img.shields.io/github/v/release/minoda-lab/universc?label=GitHub%20release)
![GitHub release (latest by date)](https://img.shields.io/github/downloads/minoda-lab/universc/1.1.4/total)
![GitHub release (by tag)](https://img.shields.io/github/downloads/minoda-lab/universc/1.1.4/total)
![GitHub release (latest by date)](https://img.shields.io/github/downloads/minoda-lab/universc/1.2.1/total)
![GitHub release (by tag)](https://img.shields.io/github/downloads/minoda-lab/universc/1.2.1/total)

![Docker CI](https://github.com/minoda-lab/universc/workflows/CI%20to%20Docker%20hub/badge.svg)
![Docker compose](https://github.com/minoda-lab/universc/workflows/Docker%20compose%20build/badge.svg)
@@ -93,7 +93,7 @@ and use this tool to process single-cell RNA-Seq data from FASTQ format.

**Package**

UniverSC version 1.1.4
UniverSC version 1.2.1

**Maintainers**

@@ -120,7 +120,7 @@ If you wish to install `cellranger` and configure this script to run on a Linux

Note that `cellranger` installations that are pre-compiled on Linux will not run on Mac or Windows. Note that Mac OS and some Linux distributions also have different version of sed and rename. It is possible to compile an open-source version of Cell Ranger but it is tricky to install the dependencies so we recommend using our docker [image](#Docker) if you wish to do this. 

### Beginners
### Command-line Beginners

If you are a beginner bioinformatician or wish to run this on a local computer (Mac or Windows), no problem! We provide a "docker" image containing everything needed to run it without installing the software needed. All you need to do is install [docker](https://docs.docker.com/desktop/) and follow our guide to use the [image](#Docker). This comes bundled with all the compatible versions needed to run it.

@@ -128,6 +128,13 @@ Note that you need to run the shell commands given in a unix-like command-line i

If you run into problems installing or running `launch_universc.sh` please don't hesistate to contact us via email or GitHub.

### Graphical application Users

We also provide a graphical user interface (GUI) based application to run the Docker [image](#Docker). Please install Docker as described above and pull our `tomkellygenetics/universc:latest` image using either the docker command-line interface (CLI) or the docker graphical application.

Once you have a docker image installed on your system, you can run the applicable binary available here:

[https://genomec.gsc.riken.jp/gerg/UniverSC/UniverSC_Release/](https://genomec.gsc.riken.jp/gerg/UniverSC/UniverSC_Release/)

## Purpose

@@ -346,8 +353,8 @@ as follows:

Kelly, S.T., Battenberg, Hetherington, N.A., K., Hayashi, K., and Minoda, A. (2021)
UniverSC: a flexible cross-platform single-cell data processing pipeline.
bioRxiv 2021.01.19.427209; doi: [https://doi.org/10.11.1.4021.01.19.427209](https://doi.org/10.11.1.4021.01.19.427209)
package version 1.1.4. [https://github.com/minoda-lab/universc](https://github.com/minoda-lab/universc)
bioRxiv 2021.01.19.427209; doi: [https://doi.org/10.11.2.1021.01.19.427209](https://doi.org/10.11.2.1021.01.19.427209)
package version 1.2.1. [https://github.com/minoda-lab/universc](https://github.com/minoda-lab/universc)

```
@article {Kelly2021.01.19.427209,
@@ -355,12 +362,12 @@ package version 1.1.4. [https://github.com/minoda-lab/universc](https://github.c
        title = {{UniverSC}: a flexible cross-platform single-cell data processing pipeline},
        elocation-id = {2021.01.19.427209},
        year = {2021},
        doi = {10.11.1.4021.01.19.427209},
        doi = {10.11.2.1021.01.19.427209},
        publisher = {Cold Spring Harbor Laboratory},
        abstract = {Single-cell RNA-sequencing analysis to quantify RNA molecules in individual cells has become popular owing to the large amount of information one can obtain from each experiment. We have developed UniverSC (https://github.com/minoda-lab/universc), a universal single-cell processing tool that supports any UMI-based platform. Our command-line tool enables consistent and comprehensive integration, comparison, and evaluation across data generated from a wide range of platforms.Competing Interest StatementThe authors have declared no competing interest.},
        eprint = {https://www.biorxiv.org/content/early/2021/01/19/2021.01.19.427209.full.pdf},
        journal = {{bioRxiv}},
        note = {package version 1.1.4},
        note = {package version 1.2.1},
        URL = {https://github.com/minoda-lab/universc},
}

@@ -371,7 +378,7 @@ package version 1.1.4. [https://github.com/minoda-lab/universc](https://github.c
    title = {{UniverSC}:  a flexible cross-platform single-cell data processing pipeline},
    author = {S. Thomas Kelly, Kai Battenberg, Nicola A. Hetherington, Makoto Hayashi, and Aki Minoda},
    year = {2021},
    note = {package version 1.1.4},
    note = {package version 1.2.1},
    url = {https://github.com/minoda-lab/universc},
  }
```
@@ -824,7 +831,7 @@ your systems administrator.

#### Pulling from remote DockerHub repository

We provide a docker image for UniverSC version 1.1.4.
We provide a docker image for UniverSC version 1.2.1.

You can import it if you have docker installed.

@@ -832,6 +839,8 @@ You can import it if you have docker installed.
docker pull tomkellygenetics/universc:latest
```

#### Running processes in a docker container

Then you can run UniverSC with:

```
@@ -850,6 +859,15 @@ docker run -it tomkellygenetics/universc:latest /bin/zsh

Either of these shells are supported.

Note that Docker containers run with a default of 2 GB of memory.
It is recommended to use at least 8 GB of memory as this is often
insufficient for running UniverSC. Ideally, 16 GB of memory should
be used if it is available on your system.

 ```
docker run --memory 16g -it tomkellygenetics/universc:latest /bin/bash
```

##### Building the Docker image locally

The Dockerfile is provided in the repository so it can be built from
@@ -944,6 +962,12 @@ make reference
cd ../../..
```

#### Pre-generated References

For convenience we provide pre-generated references for the human genome and various model species available for download:

[https://genomec.gsc.riken.jp/gerg/UniverSC/Premade_references/](https://genomec.gsc.riken.jp/gerg/UniverSC/Premade_references/)

#### Custom Cell Ranger references

It is also possible to generate a custom reference for any genome provided you have
@@ -955,7 +979,7 @@ The `gffread` function includes with the [cufflinks](http://cole-trapnell-lab.gi
utility can convert to gtf. For example:

```
gffread test/cellranger_reference/cellranger-tiny-ref/genes-1.2.0.gff3 -T -o test/cellranger_reference/cellranger-tiny-ref/genes-1.2.0.gtf 
gffread test/cellranger_reference/cellranger-tiny-ref/genes-1.2.1.gff3 -T -o test/cellranger_reference/cellranger-tiny-ref/genes-1.2.1.gtf 
```

To generate new references we first remove the references imported.
@@ -968,8 +992,8 @@ We then generate references from the FASTA and GTF files as shown in the followi

```
cellranger mkref --genome=test/cellranger_reference/cellranger-tiny-ref/1.2.0 \
        --fasta=test/cellranger_reference/cellranger-tiny-ref/genome-1.2.0.fa \
        --genes=test/cellranger_reference/cellranger-tiny-ref/ genes-1.2.0.gtf
        --fasta=test/cellranger_reference/cellranger-tiny-ref/genome-1.2.1.fa \
        --genes=test/cellranger_reference/cellranger-tiny-ref/ genes-1.2.1.gtf

cellranger mkref --genome=test/cellranger_reference/cellranger-tiny-ref/3.0.0 \
         --fasta=test/cellranger_reference/cellranger-tiny-ref/genome-3.0.0.fa \
@@ -1103,7 +1127,8 @@ Mandatory arguments to long options are mandatory for short options too.
  -r,  --reference DIR          Path of directory containing 10x-compatible reference.
  -t,  --technology PLATFORM    Name of technology used to generate data.
                                Supported technologies:
                                  10x Genomics (version automatically detected): 10x, chromium
                                  10x Genomics (version 2 or 3 automatically detected): 10x, chromium
                                  10x Genomics version 1 (14 bp barcode, 10 bp UMI): 10x-v1, chromium-v1
                                  10x Genomics version 2 (16 bp barcode, 10 bp UMI): 10x-v2, chromium-v2
                                  10x Genomics version 3 (16 bp barcode, 12 bp UMI): 10x-v3, chromium-v3
                                  Aligent Bravo B (16 bp barcode, No UMI): aligent, bravo
@@ -1149,6 +1174,7 @@ Mandatory arguments to long options are mandatory for short options too.

  -c,  --chemistry CHEM         Assay configuration, autodetection is not possible for converted files: SC3Pv2 (default), SC5P-PE, or SC5P-R2
                                    5′ scRNA-Seq ('SC5P-PE') is available only for 10x Genomics, ICELL8, SmartSeq, and STRT-Seq technologies
                                    Setting 'SC3Pv1' for 10x version 1 chemistry is recommended.
                                    All other technologies default to 3′ scRNA-Seq parameters. Only 10x Genomics and ICELL8 allow choosing which to use.
  -n,  --force-cells NUM        Force pipeline to use this number of cells, bypassing the cell detection algorithm.
  -j,  --jobmode MODE           Job manager to use. Valid options: local (default), sge, lsf, or a .template file
Loading