Commit eeee74c4 authored by Li's avatar Li Committed by Haowen Zhang
Browse files

Check whether the barcode whitelist matches the data and add the option to check this step.

parent 9a2e3f17
Loading
Loading
Loading
Loading
+17 −3
Original line number Diff line number Diff line
@@ -749,6 +749,13 @@ void Chromap<MappingRecord>::ComputeBarcodeAbundance(
          ++num_sample_barcodes_;
        }
      }

			if (!skip_barcode_check_  
					&& num_sample_barcodes_ * 20 < num_loaded_barcodes) {
				// Since num_loaded_pairs is a constant, this if is actuaclly only effective in the first iteration
				Chromap<>::ExitWithMessage("Less than 5\% barcodes can be found or corrected based on the barcode whitelist.\nPlease check whether the barcode whitelist matches the data, e.g. length, reverse-complement. If this is a false positive warning, please run Chromap with the option --skip-barcode-check.");
			}

      if (num_sample_barcodes_ >= max_num_sample_barcodes) {
        break;
      }
@@ -759,6 +766,7 @@ void Chromap<MappingRecord>::ComputeBarcodeAbundance(
      break;
    }
  }

  std::cerr << "Compute barcode abundance using " << num_sample_barcodes_
            << " in " << Chromap<>::GetRealTime() - real_start_time << "s.\n";
}
@@ -5968,8 +5976,8 @@ void ChromapDriver::ParseArgsAndRun(int argc, char *argv[]) {
      //("p,matrix-output-prefix", "Prefix of matrix output files",
      // cxxopts::value<std::string>(), "FILE")
      ("output-mappings-not-in-whitelist",
       "Output mappings with barcode not in the whitelist")(
          "chr-order", "custom chromsome order", cxxopts::value<std::string>(),
       "Output mappings with barcode not in the whitelist")
			("chr-order", "custom chromsome order", cxxopts::value<std::string>(),
          "FILE")("BED", "Output mappings in BED/BEDPE format")(
          "TagAlign", "Output mappings in TagAlign/PairedTagAlign format")(
          "SAM", "Output mappings in SAM format")(
@@ -5999,7 +6007,9 @@ void ChromapDriver::ParseArgsAndRun(int argc, char *argv[]) {
      "drop-repetitive-reads",
      "Drop reads with too many best mappings [500000]", cxxopts::value<int>(),
      "INT")("allocate-multi-mappings", "Allocate multi-mappings")(
      "PAF", "Output mappings in PAF format (only for test)");
      "PAF", "Output mappings in PAF format (only for test)")
			("skip-barcode-check", "Do not check whether too few barcodes are in the whitelist")
				;

  auto result = options.parse(argc, argv);
  if (result.count("h")) {
@@ -6277,6 +6287,10 @@ void ChromapDriver::ParseArgsAndRun(int argc, char *argv[]) {
          result["pairs-natural-chr-order"].as<std::string>();
    }

    if (result.count("skip-barcode-check")) {
      mapping_parameters.skip_barcode_check = true;
    }

    // std::cerr << "Parameters: error threshold: " << error_threshold << ",
    // match score: " << match_score << ", mismatch_penalty: " <<
    // mismatch_penalty << ", gap open penalties for deletions and insertions: "
+6 −2
Original line number Diff line number Diff line
@@ -16,7 +16,7 @@
#include "output_tools.h"
#include "sequence_batch.h"

#define CHROMAP_VERSION "0.1.3-r256"
#define CHROMAP_VERSION "0.1.3-r257"

namespace chromap {
struct uint128_t {
@@ -117,6 +117,7 @@ struct MappingParameters {
  std::string matrix_output_prefix;
  std::string custom_rid_order_path;
  std::string pairs_custom_rid_order_path;
  bool skip_barcode_check = false;
};

#define SortMappingWithoutBarcode(m)                                    \
@@ -202,7 +203,8 @@ class Chromap {
        matrix_output_prefix_(mapping_parameters.matrix_output_prefix),
        custom_rid_order_path_(mapping_parameters.custom_rid_order_path),
        pairs_custom_rid_order_path_(
            mapping_parameters.pairs_custom_rid_order_path) {
            mapping_parameters.pairs_custom_rid_order_path),
        skip_barcode_check_(mapping_parameters.skip_barcode_check){
    barcode_lookup_table_ = kh_init(k64_seq);
    barcode_whitelist_lookup_table_ = kh_init(k64_seq);
    barcode_histogram_ = kh_init(k64_seq);
@@ -681,6 +683,8 @@ class Chromap {
  uint32_t barcode_length_ = 0;
  khash_t(k64_seq) * barcode_histogram_;
  khash_t(k64_seq) * barcode_index_table_;
	bool skip_barcode_check_ = false ;

  // For peak calling
  std::vector<std::vector<uint16_t> > pileup_on_diff_ref_seqs_;
  std::vector<std::vector<Peak> > peaks_on_diff_ref_seqs_;