Commit 469505c2 authored by Chris Cheshire's avatar Chris Cheshire
Browse files

Merge branch 'chris-feat' into v2.0

parents a148402a 48fb5573
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -164,6 +164,9 @@ jobs:
          - verify_output_align_duplicates_remove_target
          - verify_output_peak_calling_only_peak_calling
          - test_peak_callers
          - test_conseneus_peaks_group
          - test_conseneus_peaks_all
          - test_conseneus_peaks_invalid
          - verify_output_reporting
    steps:
      - uses: actions/checkout@v2
+27 −6
Original line number Diff line number Diff line
@@ -440,15 +440,37 @@ if (params.run_mark_dups) {
========================================================================================
*/

if(params.run_peak_calling) {
if(params.run_peak_calling && (params.normalisation_mode == "Spikein" || params.normalisation_mode == "None")) {
    process {
        withName: 'NFCORE_CUTANDRUN:CUTANDRUN:BEDTOOLS_GENOMECOV' {
        withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PREPARE_PEAKCALLING:BEDTOOLS_GENOMECOV' {
            ext.args = "-bg"
            publishDir = [
                enabled: false
            ]
        }
    }
}

        withName: 'NFCORE_CUTANDRUN:CUTANDRUN:BEDTOOLS_SORT' {
if(params.run_peak_calling && (params.normalisation_mode != "Spikein" && params.normalisation_mode != "None")) {
    process {
        withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PREPARE_PEAKCALLING:DEEPTOOLS_BAMCOVERAGE' {
            ext.args   = [
                '--outFileFormat bedgraph',
                '--skipNAs',
                "--binSize ${params.normalisation_binsize}",
                "--normalizeUsing ${params.normalisation_mode}"
            ].join(' ').trim()
            ext.prefix = { "${meta.id}.bedgraph" }
            publishDir = [
                enabled: false
            ]
        }
    }
}

if(params.run_peak_calling) {
    process {
        withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PREPARE_PEAKCALLING:BEDTOOLS_SORT' {
            ext.prefix = { "${meta.id}.sorted" }
            publishDir = [
                path: { "${params.outdir}/03_peak_calling/01_bam_to_bedgraph" },
@@ -458,7 +480,7 @@ if(params.run_peak_calling) {
            ]
        }

        withName: 'NFCORE_CUTANDRUN:CUTANDRUN:UCSC_BEDCLIP' {
        withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PREPARE_PEAKCALLING:UCSC_BEDCLIP' {
            ext.prefix = { "${meta.id}.clipped" }
            publishDir = [
                path: { "${params.outdir}/03_peak_calling/02_clip_bed" },
@@ -468,7 +490,7 @@ if(params.run_peak_calling) {
            ]
        }

        withName: 'NFCORE_CUTANDRUN:CUTANDRUN:UCSC_BEDGRAPHTOBIGWIG' {
        withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PREPARE_PEAKCALLING:UCSC_BEDGRAPHTOBIGWIG' {
            publishDir = [
                path: { "${params.outdir}/03_peak_calling/03_bed_to_bigwig" },
                mode: 'copy',
@@ -542,7 +564,6 @@ if(params.run_peak_calling) {
                saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
                enabled: true
            ]
            cache = false
        }

        withName: '.*:CONSENSUS_PEAKS:PLOT_CONSENSUS_PEAKS|.*:CONSENSUS_PEAKS_ALL:PLOT_CONSENSUS_PEAKS' {
+73 −0
Original line number Diff line number Diff line
%% Cell type:code id:21951c4f-eabc-49c8-8ca6-decc244f0592 tags:

``` python
import glob
import pandas as pd
```

%% Output

    ---------------------------------------------------------------------------
    ModuleNotFoundError                       Traceback (most recent call last)
Input     In [7], in <module>
          1 import glob
    ----> 2 import pandas as pd
    ModuleNotFoundError: No module named 'pandas'

%% Cell type:code id:b79274d5-13e9-4c95-93a7-1fd0ef623e12 tags:

``` python
```

%% Cell type:code id:c10f7b3a-f1bd-4e2e-a1b1-236901020676 tags:

``` python
path = "/Users/cheshic/dev/test_data/cutandrun/*.bed"
```

%% Cell type:code id:5e3d36ca-cad8-42f1-a5c3-983bb07bc27a tags:

``` python
seacr_bed_list = glob.glob(path)
seacr_bed_list
```

%% Output

    ['/Users/cheshic/dev/test_data/cutandrun/h3k4me3_R2.peaks.bed.stringent.bed',
     '/Users/cheshic/dev/test_data/cutandrun/h3k4me3_R1.peaks.bed.stringent.bed',
     '/Users/cheshic/dev/test_data/cutandrun/h3k27me3_R2.peaks.bed.stringent.bed',
     '/Users/cheshic/dev/test_data/cutandrun/h3k27me3_R1.peaks.bed.stringent.bed']

%% Cell type:code id:87b50550-d7fe-4391-969d-1ae40d4e2957 tags:

``` python
for i in list(range(len(seacr_bed_list))):
        seacr_bed_i = pd.read_csv(seacr_bed_list[i], sep='\t', header=None, usecols=[0,1,2,3,4], names=['chrom','start','end','total_signal','max_signal'])
        bed_base_i = os.path.basename(seacr_bed_list[i])

        #  split bed files on dots
        bed_id_list = bed_base_i.split(".")

        # join list on the elements of the sample id
        separator = ""
        sample_id = separator.join(bed_id_list[0:-4])

        # split sample id on underscores
        sample_id_split_list = sample_id.split("_")

        #  take first element of this list for group id
        group_i = separator.join(sample_id_split_list[0:-1])

        # take last element fo this list for replicate number
        rep_i = sample_id_split_list[-1]

        seacr_bed_i['group'] = np.repeat(group_i, seacr_bed_i.shape[0])
        seacr_bed_i['replicate'] = np.repeat(rep_i, seacr_bed_i.shape[0])

        if i==0:
            self.seacr_beds = seacr_bed_i

        else:
            self.seacr_beds = self.seacr_beds.append(seacr_bed_i)
```
+142 −0
Original line number Diff line number Diff line
%% Cell type:code id:21951c4f-eabc-49c8-8ca6-decc244f0592 tags:

``` python
import glob
import os
import pandas as pd
import numpy as np
```

%% Cell type:code id:b79274d5-13e9-4c95-93a7-1fd0ef623e12 tags:

``` python
seacr_beds = None
```

%% Cell type:code id:c10f7b3a-f1bd-4e2e-a1b1-236901020676 tags:

``` python
path = "/Users/cheshic/dev/test_data/cutandrun/*.bed"
```

%% Cell type:code id:5e3d36ca-cad8-42f1-a5c3-983bb07bc27a tags:

``` python
seacr_bed_list = glob.glob(path)
seacr_bed_list
```

%% Output

    ['/Users/cheshic/dev/test_data/cutandrun/h3k4me3_R2.peaks.bed.stringent.bed',
     '/Users/cheshic/dev/test_data/cutandrun/h3k4me3_R1.peaks.bed.stringent.bed',
     '/Users/cheshic/dev/test_data/cutandrun/h3k27me3_R2.peaks.bed.stringent.bed',
     '/Users/cheshic/dev/test_data/cutandrun/h3k27me3_R1.peaks.bed.stringent.bed']

%% Cell type:code id:87b50550-d7fe-4391-969d-1ae40d4e2957 tags:

``` python
for i in list(range(len(seacr_bed_list))):
        seacr_bed_i = pd.read_csv(seacr_bed_list[i], sep='\t', header=None, usecols=[0,1,2,3,4], names=['chrom','start','end','total_signal','max_signal'])
        print(len(seacr_bed_i.index))
        bed_base_i = os.path.basename(seacr_bed_list[i])

        #  split bed files on dots
        bed_id_list = bed_base_i.split(".")

        # join list on the elements of the sample id
        separator = ""
        sample_id = separator.join(bed_id_list[0:-4])

        # split sample id on underscores
        sample_id_split_list = sample_id.split("_")

        #  take first element of this list for group id
        group_i = separator.join(sample_id_split_list[0:-1])

        # take last element fo this list for replicate number
        rep_i = sample_id_split_list[-1]

        seacr_bed_i['group'] = np.repeat(group_i, seacr_bed_i.shape[0])
        seacr_bed_i['replicate'] = np.repeat(rep_i, seacr_bed_i.shape[0])

        if i==0:
            seacr_beds = seacr_bed_i

        else:
            seacr_beds = seacr_beds.append(seacr_bed_i)
```

%% Output

    25076
    97770

    /var/folders/3p/m4wl6mfj0pq4445vvxg8c458fthlkc/T/ipykernel_48447/363799724.py:29: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
      seacr_beds = seacr_beds.append(seacr_bed_i)

    588171

    /var/folders/3p/m4wl6mfj0pq4445vvxg8c458fthlkc/T/ipykernel_48447/363799724.py:29: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
      seacr_beds = seacr_beds.append(seacr_bed_i)

    690738

    /var/folders/3p/m4wl6mfj0pq4445vvxg8c458fthlkc/T/ipykernel_48447/363799724.py:29: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
      seacr_beds = seacr_beds.append(seacr_bed_i)

%% Cell type:code id:ee8e5c87-3bb6-42f9-aa17-5361ddf600f0 tags:

``` python
seacr_beds
```

%% Output

           chrom      start        end  total_signal  max_signal     group  \
    0       chr1     633966     634046       1160.25     38.0409   h3k4me3
    1       chr1     778350     778443       1319.54     33.2858   h3k4me3
    2       chr1     778551     778681       2817.40     49.9287   h3k4me3
    3       chr1     779141     779265       1428.91     28.5307   h3k4me3
    4       chr1     827475     827527       1440.80     52.3062   h3k4me3
    ...      ...        ...        ...           ...         ...       ...
    690733  chrX  154894369  154894403       2227.27     90.9091  h3k27me3
    690734  chrX  154896407  154896438       2181.82     90.9091  h3k27me3
    690735  chrX  154980844  154980890       2272.73     90.9091  h3k27me3
    690736  chrX  155756168  155756216       2272.73     90.9091  h3k27me3
    690737  chrY   11107016   11107055       2227.27     90.9091  h3k27me3
    
           replicate
    0             R2
    1             R2
    2             R2
    3             R2
    4             R2
    ...          ...
    690733        R1
    690734        R1
    690735        R1
    690736        R1
    690737        R1
    
    [1401755 rows x 7 columns]

%% Cell type:code id:584fd72f-5ef9-46ee-91f9-971ba67354ec tags:

``` python
seacr_beds_group_rep = seacr_beds[['group','replicate']].groupby(['group','replicate']).size().reset_index().rename(columns={0:'all_peaks'})
seacr_beds_group_rep
```

%% Output

          group replicate  all_peaks
    0  h3k27me3        R1     690738
    1  h3k27me3        R2     588171
    2   h3k4me3        R1      97770
    3   h3k4me3        R2      25076

%% Cell type:code id:c56c7417-530d-4b03-93fd-ffdf98f2abbd tags:

``` python
```
+3 −0
Original line number Diff line number Diff line
@@ -30,6 +30,9 @@
            "custom/getchromsizes": {
                "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
            },
            "deeptools/bamcoverage": {
                "git_sha": "fdb1664885480d9411c24ba45bb4fde4738e5907"
            },
            "deeptools/computematrix": {
                "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
            },
Loading