Commit b950466d authored by Chris Cheshire's avatar Chris Cheshire
Browse files

Updated samplesheet processing and testing to new scheme

parent 3d5a4eb3
Loading
Loading
Loading
Loading
+16 −15
Original line number Diff line number Diff line
@@ -112,9 +112,11 @@ def check_samplesheet(file_in, file_out, use_control):
                print_error("Control entry and sample entry must be different!", "Line", line)

            ## Check replicate entry is integer
            if not replicate.isdigit() & int(replicate) > 0:
                print_error("Replicate id not an integer or is not > 0!", "Line", line)
            if not replicate.isdigit():
                print_error("Replicate id not an integer", "Line", line)
            replicate = int(replicate)
            if replicate <= 0:
                print_error("Replicate must be > 0", "Line", line)

            ## Check FastQ file extension
            for fastq in [fastq_1, fastq_2]:
@@ -170,7 +172,7 @@ def check_samplesheet(file_in, file_out, use_control):
    ## Create control identity variable
    for sample in sorted(sample_run_dict.keys()):
        for replicate in sorted(sample_run_dict[sample].keys()):
            sample_info = sample_run_dict[sample][replicate][0]
            for idx, sample_info in enumerate(sample_run_dict[sample][replicate]):
                if control_present:
                    if sample_info[0] in control_names_list:
                        sample_info.append("1")
@@ -180,14 +182,13 @@ def check_samplesheet(file_in, file_out, use_control):
                        sample_info.append("0")
                else: 
                    sample_info.append("0")
            sample_run_dict[sample][replicate][0] = sample_info

    ## Check igg_control parameter is consistent with input groups
    if (use_control == 'true' and not control_present):
        print_error("ERROR: No 'control' group was found in " + str(file_in) + " If you are not supplying a control, please specify --igg_control 'false' on command line.")
        print_error("ERROR: No 'control' group was found in " + str(file_in) + " If you are not supplying a control, please specify --use_control 'false' on command line.")
        
    if (use_control == 'false' and control_present):
        print("WARNING: Parameter --igg_control was set to false, but an 'igg' group was found in " + str(file_in) + ".")
        print("WARNING: Parameter --use_control was set to false, but an control group was found in " + str(file_in) + ".")

    ## Write validated samplesheet with appropriate columns
    if len(sample_run_dict) > 0:
@@ -202,7 +203,7 @@ def check_samplesheet(file_in, file_out, use_control):
                uniq_rep_ids = set(sample_run_dict[sample].keys())
                if len(uniq_rep_ids) != max(uniq_rep_ids):
                    print_error(
                        "Replicate ids must start with 1..<num_replicates>!",
                        "Replicate ids must start with 1!",
                        "Group",
                        sample,
                    )
+121 −56
Original line number Diff line number Diff line
@@ -28,77 +28,142 @@
    - test_samplesheet
  exit_code: 1

# Test dot in group id
- name: test_samplesheet_check_group_name_with_dot
  command: nextflow run main.nf -profile docker,test --input https://raw.githubusercontent.com/luslab/test-datasets/cutandrun/samplesheet/unit_tests/sample_dot.csv -c tests/config/nextflow.config
  tags:
    - test_samplesheet
  files:
    - path: results/pipeline_info/samplesheet.valid.csv

# Test invalid number of columns in row
- name: test_samplesheet_invalid_cols_in_row
  command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/luslab/test-datasets/cutandrun/samplesheet/unit_tests/invalid_column_in_row.csv -c tests/config/nextflow.config
  tags:
    - test_samplesheet
  exit_code: 1

# - name: test_samplesheet_check_1_1_pos
#   command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/luslab/test-datasets/cutandrun/samplesheet/unit_tests/single_sample_single_igg.csv -c tests/config/nextflow.config
#   tags:
#     - samplesheet
#     - samplesheet_1_1
#     - samplesheet_1_1_pos
#   files:
#     - path: results/pipeline_info/samplesheet.valid.csv

# - name: test_samplesheet_check_1_1_neg
#   command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/luslab/test-datasets/cutandrun/samplesheet/unit_tests/single_sample_single_igg_neg.csv -c tests/config/nextflow.config
#   tags:
#     - samplesheet
#     - samplesheet_1_1
#     - samplesheet_1_1_neg
#   exit_code: 1
# Test group is blank
- name: test_samplesheet_group_is_blank
  command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/luslab/test-datasets/cutandrun/samplesheet/unit_tests/group_is_blank.csv -c tests/config/nextflow.config
  tags:
    - test_samplesheet
  exit_code: 1

# - name: test_samplesheet_check_1_2_group_match
#   command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/luslab/test-datasets/cutandrun/samplesheet/unit_tests/single_sample_single_igg_non_matching_groups.csv -c tests/config/nextflow.config
#   tags:
#     - samplesheet
#     - samplesheet_1_2
#     - samplesheet_1_2_group_match
#   exit_code: 1
# Test group has spaces
- name: test_samplesheet_group_has_spaces
  command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/luslab/test-datasets/cutandrun/samplesheet/unit_tests/group_has_spaces.csv -c tests/config/nextflow.config
  tags:
    - test_samplesheet
  exit_code: 1

# - name: test_samplesheet_check_2_2_group_match_tech_rep
#   command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/luslab/test-datasets/cutandrun/samplesheet/unit_tests/single_sample_single_igg_non_matching_groups_tech_rep.csv -c tests/config/nextflow.config
#   tags:
#     - samplesheet
#     - samplesheet_2_2
#     - samplesheet-2_2_group_match_tech_rep
#   exit_code: 1
# Test control has spaces
- name: test_samplesheet_control_has_spaces
  command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/luslab/test-datasets/cutandrun/samplesheet/unit_tests/control_has_spaces.csv -c tests/config/nextflow.config
  tags:
    - test_samplesheet
  exit_code: 1

# - name: test_samplesheet_check_multiple_sample_single_igg
#   command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/luslab/test-datasets/cutandrun/samplesheet/unit_tests/multiple_sample_single_igg.csv -c tests/config/nextflow.config
#   tags:
#     - samplesheet
#     - samplesheet_M_1
#   files:
#     - path: results/pipeline_info/samplesheet.valid.csv
# Test group equals control
- name: test_samplesheet_group_equals_control
  command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/luslab/test-datasets/cutandrun/samplesheet/unit_tests/group_equals_control.csv -c tests/config/nextflow.config
  tags:
    - test_samplesheet
  exit_code: 1

# - name: test_samplesheet_check_multiple_sample_multiple_igg
#   command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/luslab/test-datasets/cutandrun/samplesheet/unit_tests/multiple_sample_multiple_igg.csv -c tests/config/nextflow.config
#   tags:
#     - samplesheet
#     - samplesheet_M_M
#   files:
#     - path: results/pipeline_info/samplesheet.valid.csv
# Test negative replicate number
- name: test_samplesheet_neg_rep
  command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/luslab/test-datasets/cutandrun/samplesheet/unit_tests/negative_rep_num.csv -c tests/config/nextflow.config
  tags:
    - test_samplesheet
  exit_code: 1

# - name: test_samplesheet_check_group_name_with_dot
#   command: nextflow run main.nf -profile docker,test --input https://raw.githubusercontent.com/luslab/test-datasets/cutandrun/samplesheet/test-GSE145187-small-dot.csv -c tests/config/nextflow.config
#   tags:
#     - samplesheet_group_dot
#   files:
#     - path: results/pipeline_info/samplesheet.valid.csv
# Test replicate number does not start at 1
- name: test_samplesheet_rep_not_start_one
  command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/luslab/test-datasets/cutandrun/samplesheet/unit_tests/rep_not_start_one.csv -c tests/config/nextflow.config
  tags:
    - test_samplesheet
  exit_code: 1

# Test fastq ext incorrect
- name: test_samplesheet_fastq_ext_error
  command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/luslab/test-datasets/cutandrun/samplesheet/unit_tests/fastq_ext_error.csv -c tests/config/nextflow.config
  tags:
    - test_samplesheet
  exit_code: 1

# Test fastq path spaces
- name: test_samplesheet_fastq_path_spaces_error
  command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/luslab/test-datasets/cutandrun/samplesheet/unit_tests/fastq_file_spaces.csv -c tests/config/nextflow.config
  tags:
    - test_samplesheet
  exit_code: 1

# Test duplicate rows
- name: test_samplesheet_dup_rows
  command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/luslab/test-datasets/cutandrun/samplesheet/unit_tests/duplicate_rows.csv -c tests/config/nextflow.config
  tags:
    - test_samplesheet
  exit_code: 1

# Test control not exist
- name: test_samplesheet_ctrl_not_exist
  command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/luslab/test-datasets/cutandrun/samplesheet/unit_tests/control_not_exist.csv -c tests/config/nextflow.config
  tags:
    - test_samplesheet
  exit_code: 1

# Test control with control
- name: test_samplesheet_ctrl_with_ctrl
  command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/luslab/test-datasets/cutandrun/samplesheet/unit_tests/control_with_control.csv -c tests/config/nextflow.config
  tags:
    - test_samplesheet
  exit_code: 1

# Test small sample sheet
- name: test_samplesheet_small
  command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/luslab/test-datasets/cutandrun/samplesheet/test-GSE145187-small.csv -c tests/config/nextflow.config
  tags:
    - test_samplesheet
  files:
    - path: results/pipeline_info/samplesheet.valid.csv

# Test small tech reps
- name: test_samplesheet_small_tech_reps
  command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/luslab/test-datasets/cutandrun/samplesheet/test-GSE145187-small-tech-reps.csv -c tests/config/nextflow.config
  tags:
    - test_samplesheet
  files:
    - path: results/pipeline_info/samplesheet.valid.csv

# Test noigg small
- name: test_samplesheet_small_noigg
  command: nextflow run main.nf -profile docker,test --only_input true --use_control false --input https://raw.githubusercontent.com/luslab/test-datasets/cutandrun/samplesheet/test-GSE145187-noigg-small.csv -c tests/config/nextflow.config
  tags:
    - test_samplesheet
  files:
    - path: results/pipeline_info/samplesheet.valid.csv

# Test all
- name: test_samplesheet_all
  command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/luslab/test-datasets/cutandrun/samplesheet/test-GSE145187-all.csv -c tests/config/nextflow.config
  tags:
    - test_samplesheet
  files:
    - path: results/pipeline_info/samplesheet.valid.csv

# Test all small
- name: test_samplesheet_all_small
  command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/luslab/test-datasets/cutandrun/samplesheet/test-GSE145187-all-small.csv -c tests/config/nextflow.config
  tags:
    - test_samplesheet
  files:
    - path: results/pipeline_info/samplesheet.valid.csv

# - name: test_samplesheet_check_test_id_1
#   command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/luslab/test-datasets/cutandrun/samplesheet/unit_tests/rep_id_start_1.csv -c tests/config/nextflow.config
#   tags:
#     - samplesheet
#     - samplesheet_start_1
#   exit_code: 1
 No newline at end of file
# Test all multi-rep
- name: test_samplesheet_all_multi_rep
  command: nextflow run main.nf -profile docker,test --only_input true --input https://raw.githubusercontent.com/luslab/test-datasets/cutandrun/samplesheet/test-GSE145187-all-multi-rep.csv -c tests/config/nextflow.config
  tags:
    - test_samplesheet
  files:
    - path: results/pipeline_info/samplesheet.valid.csv