Merge branch 'chris-feat' into v2.0 (469505c2) · Commits · Chaos / Cutandrun

.github/workflows/ci.yml

+3 −0

Original line number	Diff line number	Diff line
		@@ -164,6 +164,9 @@ jobs:
		- verify_output_align_duplicates_remove_target
		- verify_output_peak_calling_only_peak_calling
		- test_peak_callers
		- test_conseneus_peaks_group
		- test_conseneus_peaks_all
		- test_conseneus_peaks_invalid
		- verify_output_reporting
		steps:
		- uses: actions/checkout@v2

conf/modules.config

+27 −6

Original line number	Diff line number	Diff line
		@@ -440,15 +440,37 @@ if (params.run_mark_dups) {
		========================================================================================
		*/

		if(params.run_peak_calling) {
		if(params.run_peak_calling && (params.normalisation_mode == "Spikein" \|\| params.normalisation_mode == "None")) {
		process {
		withName: 'NFCORE_CUTANDRUN:CUTANDRUN:BEDTOOLS_GENOMECOV' {
		withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PREPARE_PEAKCALLING:BEDTOOLS_GENOMECOV' {
		ext.args = "-bg"
		publishDir = [
		enabled: false
		]
		}
		}
		}

		withName: 'NFCORE_CUTANDRUN:CUTANDRUN:BEDTOOLS_SORT' {
		if(params.run_peak_calling && (params.normalisation_mode != "Spikein" && params.normalisation_mode != "None")) {
		process {
		withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PREPARE_PEAKCALLING:DEEPTOOLS_BAMCOVERAGE' {
		ext.args = [
		'--outFileFormat bedgraph',
		'--skipNAs',
		"--binSize ${params.normalisation_binsize}",
		"--normalizeUsing ${params.normalisation_mode}"
		].join(' ').trim()
		ext.prefix = { "${meta.id}.bedgraph" }
		publishDir = [
		enabled: false
		]
		}
		}
		}

		if(params.run_peak_calling) {
		process {
		withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PREPARE_PEAKCALLING:BEDTOOLS_SORT' {
		ext.prefix = { "${meta.id}.sorted" }
		publishDir = [
		path: { "${params.outdir}/03_peak_calling/01_bam_to_bedgraph" },
		@@ -458,7 +480,7 @@ if(params.run_peak_calling) {
		]
		}

		withName: 'NFCORE_CUTANDRUN:CUTANDRUN:UCSC_BEDCLIP' {
		withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PREPARE_PEAKCALLING:UCSC_BEDCLIP' {
		ext.prefix = { "${meta.id}.clipped" }
		publishDir = [
		path: { "${params.outdir}/03_peak_calling/02_clip_bed" },
		@@ -468,7 +490,7 @@ if(params.run_peak_calling) {
		]
		}

		withName: 'NFCORE_CUTANDRUN:CUTANDRUN:UCSC_BEDGRAPHTOBIGWIG' {
		withName: 'NFCORE_CUTANDRUN:CUTANDRUN:PREPARE_PEAKCALLING:UCSC_BEDGRAPHTOBIGWIG' {
		publishDir = [
		path: { "${params.outdir}/03_peak_calling/03_bed_to_bigwig" },
		mode: 'copy',
		@@ -542,7 +564,6 @@ if(params.run_peak_calling) {
		saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
		enabled: true
		]
		cache = false
		}

		withName: '.:CONSENSUS_PEAKS:PLOT_CONSENSUS_PEAKS\|.:CONSENSUS_PEAKS_ALL:PLOT_CONSENSUS_PEAKS' {

dev/docker/static_reports/jupyter/.ipynb_checkpoints/test_peak_count-checkpoint.ipynb

0 → 100644

+73 −0

Original line number	Diff line number	Diff line
		%% Cell type:code id:21951c4f-eabc-49c8-8ca6-decc244f0592 tags:

		``` python
		import glob
		import pandas as pd
		```

		%% Output

		---------------------------------------------------------------------------
		ModuleNotFoundError Traceback (most recent call last)
		Input In [7], in <module>
		1 import glob
		----> 2 import pandas as pd
		ModuleNotFoundError: No module named 'pandas'

		%% Cell type:code id:b79274d5-13e9-4c95-93a7-1fd0ef623e12 tags:

		``` python
		```

		%% Cell type:code id:c10f7b3a-f1bd-4e2e-a1b1-236901020676 tags:

		``` python
		path = "/Users/cheshic/dev/test_data/cutandrun/*.bed"
		```

		%% Cell type:code id:5e3d36ca-cad8-42f1-a5c3-983bb07bc27a tags:

		``` python
		seacr_bed_list = glob.glob(path)
		seacr_bed_list
		```

		%% Output

		['/Users/cheshic/dev/test_data/cutandrun/h3k4me3_R2.peaks.bed.stringent.bed',
		'/Users/cheshic/dev/test_data/cutandrun/h3k4me3_R1.peaks.bed.stringent.bed',
		'/Users/cheshic/dev/test_data/cutandrun/h3k27me3_R2.peaks.bed.stringent.bed',
		'/Users/cheshic/dev/test_data/cutandrun/h3k27me3_R1.peaks.bed.stringent.bed']

		%% Cell type:code id:87b50550-d7fe-4391-969d-1ae40d4e2957 tags:

		``` python
		for i in list(range(len(seacr_bed_list))):
		seacr_bed_i = pd.read_csv(seacr_bed_list[i], sep='\t', header=None, usecols=[0,1,2,3,4], names=['chrom','start','end','total_signal','max_signal'])
		bed_base_i = os.path.basename(seacr_bed_list[i])

		# split bed files on dots
		bed_id_list = bed_base_i.split(".")

		# join list on the elements of the sample id
		separator = ""
		sample_id = separator.join(bed_id_list[0:-4])

		# split sample id on underscores
		sample_id_split_list = sample_id.split("_")

		# take first element of this list for group id
		group_i = separator.join(sample_id_split_list[0:-1])

		# take last element fo this list for replicate number
		rep_i = sample_id_split_list[-1]

		seacr_bed_i['group'] = np.repeat(group_i, seacr_bed_i.shape[0])
		seacr_bed_i['replicate'] = np.repeat(rep_i, seacr_bed_i.shape[0])

		if i==0:
		self.seacr_beds = seacr_bed_i

		else:
		self.seacr_beds = self.seacr_beds.append(seacr_bed_i)
		```

dev/docker/static_reports/jupyter/test_peak_count.ipynb

0 → 100644

+142 −0

Original line number	Diff line number	Diff line
		%% Cell type:code id:21951c4f-eabc-49c8-8ca6-decc244f0592 tags:

		``` python
		import glob
		import os
		import pandas as pd
		import numpy as np
		```

		%% Cell type:code id:b79274d5-13e9-4c95-93a7-1fd0ef623e12 tags:

		``` python
		seacr_beds = None
		```

		%% Cell type:code id:c10f7b3a-f1bd-4e2e-a1b1-236901020676 tags:

		``` python
		path = "/Users/cheshic/dev/test_data/cutandrun/*.bed"
		```

		%% Cell type:code id:5e3d36ca-cad8-42f1-a5c3-983bb07bc27a tags:

		``` python
		seacr_bed_list = glob.glob(path)
		seacr_bed_list
		```

		%% Output

		['/Users/cheshic/dev/test_data/cutandrun/h3k4me3_R2.peaks.bed.stringent.bed',
		'/Users/cheshic/dev/test_data/cutandrun/h3k4me3_R1.peaks.bed.stringent.bed',
		'/Users/cheshic/dev/test_data/cutandrun/h3k27me3_R2.peaks.bed.stringent.bed',
		'/Users/cheshic/dev/test_data/cutandrun/h3k27me3_R1.peaks.bed.stringent.bed']

		%% Cell type:code id:87b50550-d7fe-4391-969d-1ae40d4e2957 tags:

		``` python
		for i in list(range(len(seacr_bed_list))):
		seacr_bed_i = pd.read_csv(seacr_bed_list[i], sep='\t', header=None, usecols=[0,1,2,3,4], names=['chrom','start','end','total_signal','max_signal'])
		print(len(seacr_bed_i.index))
		bed_base_i = os.path.basename(seacr_bed_list[i])

		# split bed files on dots
		bed_id_list = bed_base_i.split(".")

		# join list on the elements of the sample id
		separator = ""
		sample_id = separator.join(bed_id_list[0:-4])

		# split sample id on underscores
		sample_id_split_list = sample_id.split("_")

		# take first element of this list for group id
		group_i = separator.join(sample_id_split_list[0:-1])

		# take last element fo this list for replicate number
		rep_i = sample_id_split_list[-1]

		seacr_bed_i['group'] = np.repeat(group_i, seacr_bed_i.shape[0])
		seacr_bed_i['replicate'] = np.repeat(rep_i, seacr_bed_i.shape[0])

		if i==0:
		seacr_beds = seacr_bed_i

		else:
		seacr_beds = seacr_beds.append(seacr_bed_i)
		```

		%% Output

		25076
		97770

		/var/folders/3p/m4wl6mfj0pq4445vvxg8c458fthlkc/T/ipykernel_48447/363799724.py:29: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
		seacr_beds = seacr_beds.append(seacr_bed_i)

		588171

		/var/folders/3p/m4wl6mfj0pq4445vvxg8c458fthlkc/T/ipykernel_48447/363799724.py:29: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
		seacr_beds = seacr_beds.append(seacr_bed_i)

		690738

		/var/folders/3p/m4wl6mfj0pq4445vvxg8c458fthlkc/T/ipykernel_48447/363799724.py:29: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
		seacr_beds = seacr_beds.append(seacr_bed_i)

		%% Cell type:code id:ee8e5c87-3bb6-42f9-aa17-5361ddf600f0 tags:

		``` python
		seacr_beds
		```

		%% Output

		chrom start end total_signal max_signal group \
		0 chr1 633966 634046 1160.25 38.0409 h3k4me3
		1 chr1 778350 778443 1319.54 33.2858 h3k4me3
		2 chr1 778551 778681 2817.40 49.9287 h3k4me3
		3 chr1 779141 779265 1428.91 28.5307 h3k4me3
		4 chr1 827475 827527 1440.80 52.3062 h3k4me3
		... ... ... ... ... ... ...
		690733 chrX 154894369 154894403 2227.27 90.9091 h3k27me3
		690734 chrX 154896407 154896438 2181.82 90.9091 h3k27me3
		690735 chrX 154980844 154980890 2272.73 90.9091 h3k27me3
		690736 chrX 155756168 155756216 2272.73 90.9091 h3k27me3
		690737 chrY 11107016 11107055 2227.27 90.9091 h3k27me3

		replicate
		0 R2
		1 R2
		2 R2
		3 R2
		4 R2
		... ...
		690733 R1
		690734 R1
		690735 R1
		690736 R1
		690737 R1

		[1401755 rows x 7 columns]

		%% Cell type:code id:584fd72f-5ef9-46ee-91f9-971ba67354ec tags:

		``` python
		seacr_beds_group_rep = seacr_beds[['group','replicate']].groupby(['group','replicate']).size().reset_index().rename(columns={0:'all_peaks'})
		seacr_beds_group_rep
		```

		%% Output

		group replicate all_peaks
		0 h3k27me3 R1 690738
		1 h3k27me3 R2 588171
		2 h3k4me3 R1 97770
		3 h3k4me3 R2 25076

		%% Cell type:code id:c56c7417-530d-4b03-93fd-ffdf98f2abbd tags:

		``` python
		```

modules.json

+3 −0

Original line number	Diff line number	Diff line
		@@ -30,6 +30,9 @@
		"custom/getchromsizes": {
		"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
		},
		"deeptools/bamcoverage": {
		"git_sha": "fdb1664885480d9411c24ba45bb4fde4738e5907"
		},
		"deeptools/computematrix": {
		"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
		},

Admin message