Commit 326dc474 authored by tekath's avatar tekath
Browse files

Test with first vignette snippet.

parent e4668cbb
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -38,7 +38,7 @@ navbar:
      href: reference/index.html
    - text: Articles
      menu:
      - text: Vignette human bulk RNA-seq
        href: articles/mkin.html
      - text: Vignette human single-cell RNA-seq preprocessing
        href: articles/Hoffman_human_single-cell_preprocess.html
    - text: News
      href: news/index.html
+1 −1
Original line number Diff line number Diff line
@@ -88,7 +88,7 @@
  </a>
  <ul class="dropdown-menu" role="menu">
    <li>
      <a href="articles/mkin.html">Vignette human bulk RNA-seq</a>
      <a href="articles/Hoffman_human_single-cell_preprocess.html">Vignette human single-cell RNA-seq preprocessing</a>
    </li>
  </ul>
</li>
+363 −0
Original line number Diff line number Diff line
<!DOCTYPE html>
<!-- Generated by pkgdown: do not edit by hand --><html lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Preprocessing of the Hoffman et al. human single-cell RNA-seq data • DTUrtle</title>
<!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous">
<script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css">
<script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous">
<!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet">
<script src="../pkgdown.js"></script><meta property="og:title" content="Preprocessing of the Hoffman et al. human single-cell RNA-seq data">
<meta property="og:description" content="DTUrtle">
<!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]-->
</head>
<body data-spy="scroll" data-target="#toc">
    <div class="container template-article">
      <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
  <div class="container">
    <div class="navbar-header">
      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
        <span class="sr-only">Toggle navigation</span>
        <span class="icon-bar"></span>
        <span class="icon-bar"></span>
        <span class="icon-bar"></span>
      </button>
      <span class="navbar-brand">
        <a class="navbar-link" href="../index.html">DTUrtle</a>
        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.1.0</span>
      </span>
    </div>

    <div id="navbar" class="navbar-collapse collapse">
      <ul class="nav navbar-nav">
<li>
  <a href="../reference/index.html">Functions and data</a>
</li>
<li class="dropdown">
  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">
    Articles
     
    <span class="caret"></span>
  </a>
  <ul class="dropdown-menu" role="menu">
<li>
      <a href="../articles/Hoffman_human_single-cell_preprocess.html">Vignette human single-cell RNA-seq preprocessing</a>
    </li>
  </ul>
</li>
<li>
  <a href="../news/index.html">News</a>
</li>
      </ul>
<ul class="nav navbar-nav navbar-right"></ul>
</div>
<!--/.nav-collapse -->
  </div>
<!--/.container -->
</div>
<!--/.navbar -->

      

      </header><div class="row">
  <div class="col-md-9 contents">
    <div class="page-header toc-ignore">
      <h1 data-toc-skip>Preprocessing of the Hoffman et al. human single-cell RNA-seq data</h1>
                        <h4 class="author">Tobias Tekath</h4>
            
            <h4 class="date">2020-06-30</h4>
      
      
      <div class="hidden name"><code>Hoffman_human_single-cell_preprocess.Rmd</code></div>

    </div>

    
    
<p>This vignette examplifies the preprocessing of <strong>single-cell RNA-seq data for analysis with DTUrtle</strong>. The data used in this vignette is publicly available as <em>Bioproject PRJNA594939</em> and the used <em>FASTQ</em>-files can be downloaded from <a href="https://www.ebi.ac.uk/ena/browser/view/PRJNA594939">here</a>. The corresponding publication from Hoffman et al. to this dataset can be found <a href="https://doi.org/10.1038/s42003-020-0837-0">here</a>.</p>
<p>For this vignette we focus on just two of the available single-cell RNA-seq samples:</p>
<ul>
<li>
<a href="https://www.ebi.ac.uk/ena/browser/view/SAMN13541133">SAMN13541133</a> which represents a control sample (18 hour treatment with ethanol (EtOH))</li>
<li>
<a href="https://www.ebi.ac.uk/ena/browser/view/SAMN13541131">SAMN13541131</a> which represants a sample after 2 hourse of Dexamethasone treatment (Dex2hr)</li>
</ul>
<p>The <em>FASTQ</em>-files can be directly obtained from ENA, alternatively they are also available as <em>SRA</em>-files from <a href="https://www.ncbi.nlm.nih.gov/bioproject/PRJNA594939">GEO</a>, which can be converted to <em>FASTQ</em>-format.</p>
<p>After downloading a <strong>MD5-check</strong> is strongly encouraged.</p>
<div id="preparing-fastq-files" class="section level3">
<h3 class="hasAnchor">
<a href="#preparing-fastq-files" class="anchor"></a>Preparing FASTQ-files</h3>
<p>For this vignette, it is assumed that the <em>FASTQ</em>-files mentioned above have been downloaded to a directory called <em>samples</em>.</p>
<p>After downloading the <em>FASTQ</em>-files, the first step is to rename the files and split them into two files each. This dataset was prodcued using a Illumina SureCell 3’ kit, which produces paired end reads (as all current single-cell protocols). For unknown reasons the original two <em>FASTQ</em>-files per sample have been appended for the upload. More information about the SureCell format can be found <a href="https://teichlab.github.io/scg_lib_structs/methods_html/SureCell.html">here</a>.</p>
<p>The read files can be split with the following <em>bash</em> commands:</p>
<div class="sourceCode" id="cb1"><pre class="sourceCode bash"><code class="sourceCode bash"><a class="sourceLine" id="cb1-1" data-line-number="1"><span class="bu">cd</span> <span class="st">'YOUR_PATH'</span>/samples</a>
<a class="sourceLine" id="cb1-2" data-line-number="2"></a>
<a class="sourceLine" id="cb1-3" data-line-number="3"><span class="co">#rename</span></a>
<a class="sourceLine" id="cb1-4" data-line-number="4"><span class="fu">mv</span> SRR10669459.fastq.gz sc_EtOH.fastq.gz</a>
<a class="sourceLine" id="cb1-5" data-line-number="5"><span class="fu">mv</span> SRR10669461.fastq.gz sc_Dex2hr.fastq.gz</a>
<a class="sourceLine" id="cb1-6" data-line-number="6"></a>
<a class="sourceLine" id="cb1-7" data-line-number="7"><span class="co">#unpack the files</span></a>
<a class="sourceLine" id="cb1-8" data-line-number="8"><span class="fu">gunzip</span> sc_*.fastq.gz </a>
<a class="sourceLine" id="cb1-9" data-line-number="9"></a>
<a class="sourceLine" id="cb1-10" data-line-number="10"><span class="co">#split</span></a>
<a class="sourceLine" id="cb1-11" data-line-number="11"><span class="co"># the line numbers can be computed grepping the first linker sequence </span></a>
<a class="sourceLine" id="cb1-12" data-line-number="12"><span class="co"># or counting the number of '1'  at the end of the read name.</span></a>
<a class="sourceLine" id="cb1-13" data-line-number="13"><span class="fu">split</span> -l 1215291200 --numeric-suffixes=1 --additional-suffix=.fastq.gz sc_EtOH.fastq sc_EtOH_</a>
<a class="sourceLine" id="cb1-14" data-line-number="14"><span class="fu">split</span> -l 1394785580 --numeric-suffixes=1 --additional-suffix=.fastq.gz sc_Dex2hr.fastq sc_Dex2hr_</a></code></pre></div>
<p>Another drawback of the concatenation of the files for the upload is, that the read names do not match between the samples. We also can overcome this with the following <em>bash</em> commands:</p>
<div class="sourceCode" id="cb2"><pre class="sourceCode bash"><code class="sourceCode bash"><a class="sourceLine" id="cb2-1" data-line-number="1"><span class="co">#create files with matching names</span></a>
<a class="sourceLine" id="cb2-2" data-line-number="2"><span class="fu">cat</span> sc_EtOH_2.fastq <span class="kw">|</span> <span class="fu">sed</span> -e <span class="st">'s/^\(@SRR10669459.\)\(.*\) /echo \1$((\2-303822800)) /e'</span> <span class="op">&gt;</span> sc_EtOH_2_matched.fastq</a>
<a class="sourceLine" id="cb2-3" data-line-number="3"><span class="fu">cat</span> sc_Dex2hr_2.fastq <span class="kw">|</span> <span class="fu">sed</span> -e <span class="st">'s/^\(@SRR10669461.\)\(.*\) /echo \1$((\2-348696395)) /e'</span> <span class="op">&gt;</span> sc_Dex2hr_2_matched.fastq</a>
<a class="sourceLine" id="cb2-4" data-line-number="4"></a>
<a class="sourceLine" id="cb2-5" data-line-number="5"><span class="co">#pack again</span></a>
<a class="sourceLine" id="cb2-6" data-line-number="6"><span class="fu">gzip</span> sc_*_1.fastq</a>
<a class="sourceLine" id="cb2-7" data-line-number="7"><span class="fu">gzip</span> sc_*_matched.fastq</a>
<a class="sourceLine" id="cb2-8" data-line-number="8"></a>
<a class="sourceLine" id="cb2-9" data-line-number="9"><span class="co">#optionally: remove temporary files</span></a>
<a class="sourceLine" id="cb2-10" data-line-number="10"><span class="fu">rm</span> *.fastq</a>
<a class="sourceLine" id="cb2-11" data-line-number="11"></a>
<a class="sourceLine" id="cb2-12" data-line-number="12"><span class="co">#optionally: remove concatenated files</span></a>
<a class="sourceLine" id="cb2-13" data-line-number="13"><span class="fu">rm</span> sc_EtOH.fastq.gz </a>
<a class="sourceLine" id="cb2-14" data-line-number="14"><span class="fu">rm</span> sc_Dex2hr.fastq.gz</a></code></pre></div>
<p>There should be four packed <em>FASTQ</em>-files present in the folder, two per sample.</p>
<p>Vignettes are long form documentation commonly included in packages. Because they are part of the distribution of the package, they need to be as compact as possible. The <code>html_vignette</code> output type provides a custom style sheet (and tweaks some options) to ensure that the resulting html is as small as possible. The <code>html_vignette</code> format:</p>
<ul>
<li>Never uses retina figures</li>
<li>Has a smaller default figure size</li>
<li>Uses a custom CSS stylesheet instead of the default Twitter Bootstrap style</li>
</ul>
</div>
<div id="vignette-info" class="section level2">
<h2 class="hasAnchor">
<a href="#vignette-info" class="anchor"></a>Vignette Info</h2>
<p>Note the various macros within the <code>vignette</code> section of the metadata block above. These are required in order to instruct R how to build the vignette. Note that you should change the <code>title</code> field and the <code>\VignetteIndexEntry</code> to match the title of your vignette.</p>
</div>
<div id="styles" class="section level2">
<h2 class="hasAnchor">
<a href="#styles" class="anchor"></a>Styles</h2>
<p>The <code>html_vignette</code> template includes a basic CSS theme. To override this theme you can specify your own CSS in the document metadata as follows:</p>
<pre><code>output: 
  rmarkdown::html_vignette:
    css: mystyles.css</code></pre>
</div>
<div id="figures" class="section level2">
<h2 class="hasAnchor">
<a href="#figures" class="anchor"></a>Figures</h2>
<p>The figure sizes have been customised so that you can easily put two images side-by-side.</p>
<div class="sourceCode" id="cb4"><html><body><pre class="r"><span class="fu"><a href="https://rdrr.io/r/graphics/plot.html">plot</a></span>(<span class="fl">1</span>:<span class="fl">10</span>)
<span class="fu"><a href="https://rdrr.io/r/graphics/plot.html">plot</a></span>(<span class="fl">10</span>:<span class="fl">1</span>)</pre></body></html></div>
<p><img src="Hoffman_human_single-cell_preprocess_files/figure-html/unnamed-chunk-3-1.png" width="700"><img src="Hoffman_human_single-cell_preprocess_files/figure-html/unnamed-chunk-3-2.png" width="700"></p>
<p>You can enable figure captions by <code>fig_caption: yes</code> in YAML:</p>
<pre><code>output:
  rmarkdown::html_vignette:
    fig_caption: yes</code></pre>
<p>Then you can use the chunk option <code>fig.cap = "Your figure caption."</code> in <strong>knitr</strong>.</p>
</div>
<div id="more-examples" class="section level2">
<h2 class="hasAnchor">
<a href="#more-examples" class="anchor"></a>More Examples</h2>
<p>You can write math expressions, e.g. <span class="math inline">\(Y = X\beta + \epsilon\)</span>, footnotes<a href="#fn1" class="footnote-ref" id="fnref1"><sup>1</sup></a>, and tables, e.g. using <code><a href="https://rdrr.io/pkg/knitr/man/kable.html">knitr::kable()</a></code>.</p>
<table class="table">
<thead><tr class="header">
<th></th>
<th align="right">mpg</th>
<th align="right">cyl</th>
<th align="right">disp</th>
<th align="right">hp</th>
<th align="right">drat</th>
<th align="right">wt</th>
<th align="right">qsec</th>
<th align="right">vs</th>
<th align="right">am</th>
<th align="right">gear</th>
<th align="right">carb</th>
</tr></thead>
<tbody>
<tr class="odd">
<td>Mazda RX4</td>
<td align="right">21.0</td>
<td align="right">6</td>
<td align="right">160.0</td>
<td align="right">110</td>
<td align="right">3.90</td>
<td align="right">2.620</td>
<td align="right">16.46</td>
<td align="right">0</td>
<td align="right">1</td>
<td align="right">4</td>
<td align="right">4</td>
</tr>
<tr class="even">
<td>Mazda RX4 Wag</td>
<td align="right">21.0</td>
<td align="right">6</td>
<td align="right">160.0</td>
<td align="right">110</td>
<td align="right">3.90</td>
<td align="right">2.875</td>
<td align="right">17.02</td>
<td align="right">0</td>
<td align="right">1</td>
<td align="right">4</td>
<td align="right">4</td>
</tr>
<tr class="odd">
<td>Datsun 710</td>
<td align="right">22.8</td>
<td align="right">4</td>
<td align="right">108.0</td>
<td align="right">93</td>
<td align="right">3.85</td>
<td align="right">2.320</td>
<td align="right">18.61</td>
<td align="right">1</td>
<td align="right">1</td>
<td align="right">4</td>
<td align="right">1</td>
</tr>
<tr class="even">
<td>Hornet 4 Drive</td>
<td align="right">21.4</td>
<td align="right">6</td>
<td align="right">258.0</td>
<td align="right">110</td>
<td align="right">3.08</td>
<td align="right">3.215</td>
<td align="right">19.44</td>
<td align="right">1</td>
<td align="right">0</td>
<td align="right">3</td>
<td align="right">1</td>
</tr>
<tr class="odd">
<td>Hornet Sportabout</td>
<td align="right">18.7</td>
<td align="right">8</td>
<td align="right">360.0</td>
<td align="right">175</td>
<td align="right">3.15</td>
<td align="right">3.440</td>
<td align="right">17.02</td>
<td align="right">0</td>
<td align="right">0</td>
<td align="right">3</td>
<td align="right">2</td>
</tr>
<tr class="even">
<td>Valiant</td>
<td align="right">18.1</td>
<td align="right">6</td>
<td align="right">225.0</td>
<td align="right">105</td>
<td align="right">2.76</td>
<td align="right">3.460</td>
<td align="right">20.22</td>
<td align="right">1</td>
<td align="right">0</td>
<td align="right">3</td>
<td align="right">1</td>
</tr>
<tr class="odd">
<td>Duster 360</td>
<td align="right">14.3</td>
<td align="right">8</td>
<td align="right">360.0</td>
<td align="right">245</td>
<td align="right">3.21</td>
<td align="right">3.570</td>
<td align="right">15.84</td>
<td align="right">0</td>
<td align="right">0</td>
<td align="right">3</td>
<td align="right">4</td>
</tr>
<tr class="even">
<td>Merc 240D</td>
<td align="right">24.4</td>
<td align="right">4</td>
<td align="right">146.7</td>
<td align="right">62</td>
<td align="right">3.69</td>
<td align="right">3.190</td>
<td align="right">20.00</td>
<td align="right">1</td>
<td align="right">0</td>
<td align="right">4</td>
<td align="right">2</td>
</tr>
<tr class="odd">
<td>Merc 230</td>
<td align="right">22.8</td>
<td align="right">4</td>
<td align="right">140.8</td>
<td align="right">95</td>
<td align="right">3.92</td>
<td align="right">3.150</td>
<td align="right">22.90</td>
<td align="right">1</td>
<td align="right">0</td>
<td align="right">4</td>
<td align="right">2</td>
</tr>
<tr class="even">
<td>Merc 280</td>
<td align="right">19.2</td>
<td align="right">6</td>
<td align="right">167.6</td>
<td align="right">123</td>
<td align="right">3.92</td>
<td align="right">3.440</td>
<td align="right">18.30</td>
<td align="right">1</td>
<td align="right">0</td>
<td align="right">4</td>
<td align="right">4</td>
</tr>
</tbody>
</table>
<p>Also a quote using <code><a href="https://rdrr.io/r/base/Comparison.html">&gt;</a></code>:</p>
<blockquote>
<p>“He who gives up [code] safety for [code] speed deserves neither.” (<a href="https://twitter.com/hadleywickham/status/504368538874703872">via</a>)</p>
</blockquote>
</div>
<div class="footnotes">
<hr>
<ol>
<li id="fn1"><p>A footnote here.<a href="#fnref1" class="footnote-back"></a></p></li>
</ol>
</div>
  </div>

  <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">

        <nav id="toc" data-toggle="toc"><h2 data-toc-skip>Contents</h2>
    </nav>
</div>

</div>



      <footer><div class="copyright">
  <p>Developed by <a href="https://hesselberthlab.org">Tobias Tekath</a>.</p>
</div>

<div class="pkgdown">
  <p>Site built with <a href="https://pkgdown.r-lib.org/">pkgdown</a> 1.5.1.</p>
</div>

      </footer>
</div>

  


  </body>
</html>
Loading