Commit 5172d3bd authored by Haowen Zhang's avatar Haowen Zhang
Browse files

Add peak calling parameters

parent b49ef6c6
Loading
Loading
Loading
Loading
+79 −31
Original line number Diff line number Diff line
@@ -225,6 +225,7 @@ uint32_t Chromap<PairedEndMappingWithBarcode>::CallPeaks(uint16_t coverage_thres
        }
        ++peak_length; // extend the peak
      } else if (peak_length > 0) { // save the previous peak
        // TODO(Haowen): improve peak calling
        peaks_on_diff_ref_seqs_[ri].emplace_back(Peak{peak_start_position, peak_length, peak_count}); 
        tree_extras_on_diff_ref_seqs_[ri].emplace_back(0);
        output_tools_->OutputPeaks(peak_start_position, peak_length, ri, reference);
@@ -245,9 +246,19 @@ void Chromap<MappingRecord>::OutputFeatureMatrix(uint32_t num_sequences, const S

template <>
void Chromap<PairedEndMappingWithBarcode>::OutputFeatureMatrix(uint32_t num_sequences, const SequenceBatch &reference) {
  //uint32_t bin_size = 5000;
  //output_tools_->OutputPeaks(bin_size, num_sequences, reference);
  uint32_t num_peaks = CallPeaks(3, num_sequences, reference);
  uint32_t num_peaks = 0;
  if (cell_by_bin_) {
    output_tools_->OutputPeaks(bin_size_, num_sequences, reference);
    for (uint32_t i = 0; i < num_sequences; ++i) {
      uint32_t ref_seq_length = reference.GetSequenceLengthAt(i);
      num_peaks += ref_seq_length / bin_size_;
      if (ref_seq_length % bin_size_ != 0) {
        ++num_peaks;
      }
    }
  } else {
    num_peaks = CallPeaks(depth_cutoff_to_call_peak_, num_sequences, reference);
  }
  std::vector<std::vector<PairedEndMappingWithBarcode> > &mappings = allocate_multi_mappings_ ? allocated_mappings_on_diff_ref_seqs_ : (remove_pcr_duplicates_ ? deduped_mappings_on_diff_ref_seqs_ : mappings_on_diff_ref_seqs_);
  double real_start_time = GetRealTime();
  // First pass to index barcodes
@@ -276,12 +287,16 @@ void Chromap<PairedEndMappingWithBarcode>::OutputFeatureMatrix(uint32_t num_sequ
      uint32_t barcode_key = mappings[rid][mi].cell_barcode;
      khiter_t barcode_index_table_iterator = kh_get(k32, barcode_index_table_, barcode_key);
      uint64_t barcode_index = kh_value(barcode_index_table_, barcode_index_table_iterator);
      //uint32_t peak_index = Position2PeakIndex(bin_size, rid, mappings[rid][mi].fragment_start_position, num_sequences, reference);
      overlapped_peak_indices.clear();
      if (cell_by_bin_) {
        GetNumOverlappedBins(rid, mappings[rid][mi].fragment_start_position, mappings[rid][mi].fragment_length, num_sequences, reference, overlapped_peak_indices);
      } else {
        GetNumOverlappedPeaks(rid, mappings[rid][mi], overlapped_peak_indices);
      for (size_t pi = 0; pi < overlapped_peak_indices.size(); ++pi) {
        //uint64_t entry_index = (barcode_index << 32) | peak_index;
        uint64_t entry_index = (barcode_index << 32) | overlapped_peak_indices[pi];
      }
      size_t num_overlapped_peaks = overlapped_peak_indices.size();
      for (size_t pi = 0; pi < num_overlapped_peaks; ++pi) {
        uint32_t peak_index = overlapped_peak_indices[pi];
        uint64_t entry_index = (barcode_index << 32) | peak_index;
        khiter_t matrix_iterator = kh_get(kmatrix, matrix, entry_index);
        if (matrix_iterator == kh_end(matrix)) {
          int khash_return_code;
@@ -296,33 +311,40 @@ void Chromap<PairedEndMappingWithBarcode>::OutputFeatureMatrix(uint32_t num_sequ
  }
  std::cerr << "Generate feature matrix in " << Chromap<>::GetRealTime() - real_start_time << "s.\n";
  // Output matrix
  //uint32_t num_peaks = 0;
  //for (uint32_t i = 0; i < num_sequences; ++i) {
  //  uint32_t ref_seq_length = reference.GetSequenceLengthAt(i);
  //  num_peaks += ref_seq_length / bin_size;
  //  if (ref_seq_length % bin_size != 0) {
  //    ++num_peaks;
  //  }
  //}
  real_start_time = GetRealTime();
  output_tools_->WriteMatrixOutputHead(num_peaks, kh_size(barcode_index_table_), kh_size(matrix));
  uint64_t key;
  uint32_t value;
  kh_foreach(matrix, key, value, output_tools_->AppendMatrixOutput((uint32_t)key, (uint32_t)(key >> 32), value));
  std::vector<std::pair<uint64_t, uint32_t> > feature_matrix;
  feature_matrix.reserve(kh_size(matrix));
  //kh_foreach(matrix, key, value, output_tools_->AppendMatrixOutput((uint32_t)key, (uint32_t)(key >> 32), value));
  kh_foreach(matrix, key, value, feature_matrix.emplace_back(key, value));
  kh_destroy(kmatrix, matrix);
  std::sort(feature_matrix.begin(), feature_matrix.end());
  for (size_t i = 0; i < feature_matrix.size(); ++i) {
    output_tools_->AppendMatrixOutput((uint32_t)feature_matrix[i].first, (uint32_t)(feature_matrix[i].first >> 32), feature_matrix[i].second);
  }
  std::cerr << "Output feature matrix in " << Chromap<>::GetRealTime() - real_start_time << "s.\n";
}

template <typename MappingRecord>
uint32_t Chromap<MappingRecord>::Position2PeakIndex(uint32_t bin_size, uint32_t rid, uint32_t position, uint32_t num_sequences, const SequenceBatch &reference) {
  //TODO(Haowen): consider fragment length
  uint32_t peak_index = 0;
void Chromap<MappingRecord>::GetNumOverlappedBins(uint32_t rid, uint32_t start_position, uint16_t mapping_length, uint32_t num_sequences, const SequenceBatch &reference, std::vector<uint32_t> &overlapped_peak_indices) {
  uint32_t bin_index = 0;
  for (uint32_t i = 0; i < rid; ++i) {
    uint32_t ref_seq_length = reference.GetSequenceLengthAt(i);
    peak_index += ref_seq_length / bin_size;
    bin_index += ref_seq_length / bin_size_;
    if (ref_seq_length % bin_size_ != 0) {
      ++bin_index;
    }
  }
  bin_index += start_position / bin_size_;
  overlapped_peak_indices.emplace_back(bin_index);
  uint32_t max_num_overlapped_bins = mapping_length / bin_size_ + 2;
  for (uint32_t i = 0; i < max_num_overlapped_bins; ++i) {
    if (start_position + mapping_length - 1 >= (bin_index + 1 + i) * bin_size_) {
      overlapped_peak_indices.emplace_back(bin_index + 1 + i);
    }
  }
  peak_index += position / bin_size;
  return peak_index;
}

template <typename MappingRecord>
@@ -2260,6 +2282,12 @@ void ChromapDriver::ParseArgsAndRun(int argc, char *argv[]) {
    //("allocate-multi-mappings", "Allocate multi-mappings")
    ("Tn5-shift", "Perform Tn5 shift")
    ("t,num-threads", "# threads for mapping [1]", cxxopts::value<int>(), "INT");
  options.add_options("Peak")
    ("cell-by-bin", "Generate cell-by-bin matrix")
    ("bin-size", "Bin size to generate cell-by-bin matrix [5000]", cxxopts::value<int>(), "INT")
    ("depth-cutoff", "Depth cutoff for peak calling [3]", cxxopts::value<int>(), "INT")
    ("peak-min-length", "Min length of peaks to report [30]", cxxopts::value<int>(), "INT")
    ("peak-merge-max-length", "Peaks within this length will be merged [30]", cxxopts::value<int>(), "INT");
  options.add_options("Input")
    ("r,ref", "Reference file", cxxopts::value<std::string>(), "FILE")
    ("x,index", "Index file", cxxopts::value<std::string>(), "FILE")
@@ -2387,6 +2415,26 @@ void ChromapDriver::ParseArgsAndRun(int argc, char *argv[]) {
  if (result.count("PAF")) {
    output_mapping_in_PAF = true;
  }
  bool cell_by_bin = false;
  if (result.count("cell-by-bin")) {
    cell_by_bin = true;
  }
  int bin_size = 5000;
  if (result.count("bin-size")) {
    bin_size = result["bin-size"].as<int>();
  }
  uint16_t depth_cutoff_to_call_peak = 3;
  if (result.count("depth-cutoff")) {
    depth_cutoff_to_call_peak = result["depth-cutoff"].as<uint16_t>();
  }
  int peak_min_length = 30;
  if (result.count("peak-min-length")) {
    peak_min_length = result["peak-min-length"].as<int>();
  }
  int peak_merge_max_length = 30;
  if (result.count("peak-merge-max-length")) {
    peak_merge_max_length = result["peak-merge-max-length"].as<int>();
  }

  if (result.count("i")) {
    std::string reference_file_path;
@@ -2520,35 +2568,35 @@ void ChromapDriver::ParseArgsAndRun(int argc, char *argv[]) {
    }
    if (result.count("2") == 0) {
      if (output_mapping_in_PAF) {
        chromap::Chromap<chromap::PAFMapping> chromap_for_mapping(error_threshold, match_score, mismatch_penalty, gap_open_penalties, gap_extension_penalties, min_num_seeds_required_for_mapping, max_seed_frequencies, max_num_best_mappings, max_insert_size, mapq_threshold, num_threads, min_read_length, multi_mapping_allocation_distance, multi_mapping_allocation_seed, drop_repetitive_reads, trim_adapters, remove_pcr_duplicates, is_bulk_data, allocate_multi_mappings, only_output_unique_mappings, Tn5_shift, output_mapping_in_BED, output_mapping_in_TagAlign, output_mapping_in_PAF, reference_file_path, index_file_path, read_file1_path, read_file2_path, barcode_file_path, barcode_whitelist_file_path, output_file_path, matrix_output_prefix);
        chromap::Chromap<chromap::PAFMapping> chromap_for_mapping(error_threshold, match_score, mismatch_penalty, gap_open_penalties, gap_extension_penalties, min_num_seeds_required_for_mapping, max_seed_frequencies, max_num_best_mappings, max_insert_size, mapq_threshold, num_threads, min_read_length, multi_mapping_allocation_distance, multi_mapping_allocation_seed, drop_repetitive_reads, trim_adapters, remove_pcr_duplicates, is_bulk_data, allocate_multi_mappings, only_output_unique_mappings, Tn5_shift, output_mapping_in_BED, output_mapping_in_TagAlign, output_mapping_in_PAF, cell_by_bin, bin_size, depth_cutoff_to_call_peak, peak_min_length, peak_merge_max_length, reference_file_path, index_file_path, read_file1_path, read_file2_path, barcode_file_path, barcode_whitelist_file_path, output_file_path, matrix_output_prefix);
        chromap_for_mapping.MapSingleEndReads();
      } else {
        if (result.count("b") != 0) {
          chromap::Chromap<chromap::MappingWithBarcode> chromap_for_mapping(error_threshold, match_score, mismatch_penalty, gap_open_penalties, gap_extension_penalties, min_num_seeds_required_for_mapping, max_seed_frequencies, max_num_best_mappings, max_insert_size, mapq_threshold, num_threads, min_read_length, multi_mapping_allocation_distance, multi_mapping_allocation_seed, drop_repetitive_reads, trim_adapters, remove_pcr_duplicates, is_bulk_data, allocate_multi_mappings, only_output_unique_mappings, Tn5_shift, output_mapping_in_BED, output_mapping_in_TagAlign, output_mapping_in_PAF, reference_file_path, index_file_path, read_file1_path, read_file2_path, barcode_file_path, barcode_whitelist_file_path, output_file_path, matrix_output_prefix);
          chromap::Chromap<chromap::MappingWithBarcode> chromap_for_mapping(error_threshold, match_score, mismatch_penalty, gap_open_penalties, gap_extension_penalties, min_num_seeds_required_for_mapping, max_seed_frequencies, max_num_best_mappings, max_insert_size, mapq_threshold, num_threads, min_read_length, multi_mapping_allocation_distance, multi_mapping_allocation_seed, drop_repetitive_reads, trim_adapters, remove_pcr_duplicates, is_bulk_data, allocate_multi_mappings, only_output_unique_mappings, Tn5_shift, output_mapping_in_BED, output_mapping_in_TagAlign, output_mapping_in_PAF, cell_by_bin, bin_size, depth_cutoff_to_call_peak, peak_min_length, peak_merge_max_length, reference_file_path, index_file_path, read_file1_path, read_file2_path, barcode_file_path, barcode_whitelist_file_path, output_file_path, matrix_output_prefix);
          chromap_for_mapping.MapSingleEndReads();
        } else {
          chromap::Chromap<chromap::MappingWithoutBarcode> chromap_for_mapping(error_threshold, match_score, mismatch_penalty, gap_open_penalties, gap_extension_penalties, min_num_seeds_required_for_mapping, max_seed_frequencies, max_num_best_mappings, max_insert_size, mapq_threshold, num_threads, min_read_length, multi_mapping_allocation_distance, multi_mapping_allocation_seed, drop_repetitive_reads, trim_adapters, remove_pcr_duplicates, is_bulk_data, allocate_multi_mappings, only_output_unique_mappings, Tn5_shift, output_mapping_in_BED, output_mapping_in_TagAlign, output_mapping_in_PAF, reference_file_path, index_file_path, read_file1_path, read_file2_path, barcode_file_path, barcode_whitelist_file_path, output_file_path, matrix_output_prefix);
          chromap::Chromap<chromap::MappingWithoutBarcode> chromap_for_mapping(error_threshold, match_score, mismatch_penalty, gap_open_penalties, gap_extension_penalties, min_num_seeds_required_for_mapping, max_seed_frequencies, max_num_best_mappings, max_insert_size, mapq_threshold, num_threads, min_read_length, multi_mapping_allocation_distance, multi_mapping_allocation_seed, drop_repetitive_reads, trim_adapters, remove_pcr_duplicates, is_bulk_data, allocate_multi_mappings, only_output_unique_mappings, Tn5_shift, output_mapping_in_BED, output_mapping_in_TagAlign, output_mapping_in_PAF, cell_by_bin, bin_size, depth_cutoff_to_call_peak, peak_min_length, peak_merge_max_length, reference_file_path, index_file_path, read_file1_path, read_file2_path, barcode_file_path, barcode_whitelist_file_path, output_file_path, matrix_output_prefix);
          chromap_for_mapping.MapSingleEndReads();
        }
      }
    } else {
      if (output_mapping_in_PAF) {
        chromap::Chromap<chromap::PairedPAFMapping> chromap_for_mapping(error_threshold, match_score, mismatch_penalty, gap_open_penalties, gap_extension_penalties, min_num_seeds_required_for_mapping, max_seed_frequencies, max_num_best_mappings, max_insert_size, mapq_threshold, num_threads, min_read_length, multi_mapping_allocation_distance, multi_mapping_allocation_seed, drop_repetitive_reads, trim_adapters, remove_pcr_duplicates, is_bulk_data, allocate_multi_mappings, only_output_unique_mappings, Tn5_shift, output_mapping_in_BED, output_mapping_in_TagAlign, output_mapping_in_PAF, reference_file_path, index_file_path, read_file1_path, read_file2_path, barcode_file_path, barcode_whitelist_file_path, output_file_path, matrix_output_prefix);
        chromap::Chromap<chromap::PairedPAFMapping> chromap_for_mapping(error_threshold, match_score, mismatch_penalty, gap_open_penalties, gap_extension_penalties, min_num_seeds_required_for_mapping, max_seed_frequencies, max_num_best_mappings, max_insert_size, mapq_threshold, num_threads, min_read_length, multi_mapping_allocation_distance, multi_mapping_allocation_seed, drop_repetitive_reads, trim_adapters, remove_pcr_duplicates, is_bulk_data, allocate_multi_mappings, only_output_unique_mappings, Tn5_shift, output_mapping_in_BED, output_mapping_in_TagAlign, output_mapping_in_PAF, cell_by_bin, bin_size, depth_cutoff_to_call_peak, peak_min_length, peak_merge_max_length, reference_file_path, index_file_path, read_file1_path, read_file2_path, barcode_file_path, barcode_whitelist_file_path, output_file_path, matrix_output_prefix);
        chromap_for_mapping.MapPairedEndReads();
      } else {
        if (result.count("b") != 0) {
          chromap::Chromap<chromap::PairedEndMappingWithBarcode> chromap_for_mapping(error_threshold, match_score, mismatch_penalty, gap_open_penalties, gap_extension_penalties, min_num_seeds_required_for_mapping, max_seed_frequencies, max_num_best_mappings, max_insert_size, mapq_threshold, num_threads, min_read_length, multi_mapping_allocation_distance, multi_mapping_allocation_seed, drop_repetitive_reads, trim_adapters, remove_pcr_duplicates, is_bulk_data, allocate_multi_mappings, only_output_unique_mappings, Tn5_shift, output_mapping_in_BED, output_mapping_in_TagAlign, output_mapping_in_PAF, reference_file_path, index_file_path, read_file1_path, read_file2_path, barcode_file_path, barcode_whitelist_file_path, output_file_path, matrix_output_prefix);
          chromap::Chromap<chromap::PairedEndMappingWithBarcode> chromap_for_mapping(error_threshold, match_score, mismatch_penalty, gap_open_penalties, gap_extension_penalties, min_num_seeds_required_for_mapping, max_seed_frequencies, max_num_best_mappings, max_insert_size, mapq_threshold, num_threads, min_read_length, multi_mapping_allocation_distance, multi_mapping_allocation_seed, drop_repetitive_reads, trim_adapters, remove_pcr_duplicates, is_bulk_data, allocate_multi_mappings, only_output_unique_mappings, Tn5_shift, output_mapping_in_BED, output_mapping_in_TagAlign, output_mapping_in_PAF, cell_by_bin, bin_size, depth_cutoff_to_call_peak, peak_min_length, peak_merge_max_length, reference_file_path, index_file_path, read_file1_path, read_file2_path, barcode_file_path, barcode_whitelist_file_path, output_file_path, matrix_output_prefix);
          chromap_for_mapping.MapPairedEndReads();
        } else {
          chromap::Chromap<chromap::PairedEndMappingWithoutBarcode> chromap_for_mapping(error_threshold, match_score, mismatch_penalty, gap_open_penalties, gap_extension_penalties, min_num_seeds_required_for_mapping, max_seed_frequencies, max_num_best_mappings, max_insert_size, mapq_threshold, num_threads, min_read_length, multi_mapping_allocation_distance, multi_mapping_allocation_seed, drop_repetitive_reads, trim_adapters, remove_pcr_duplicates, is_bulk_data, allocate_multi_mappings, only_output_unique_mappings, Tn5_shift, output_mapping_in_BED, output_mapping_in_TagAlign, output_mapping_in_PAF, reference_file_path, index_file_path, read_file1_path, read_file2_path, barcode_file_path, barcode_whitelist_file_path, output_file_path, matrix_output_prefix);
          chromap::Chromap<chromap::PairedEndMappingWithoutBarcode> chromap_for_mapping(error_threshold, match_score, mismatch_penalty, gap_open_penalties, gap_extension_penalties, min_num_seeds_required_for_mapping, max_seed_frequencies, max_num_best_mappings, max_insert_size, mapq_threshold, num_threads, min_read_length, multi_mapping_allocation_distance, multi_mapping_allocation_seed, drop_repetitive_reads, trim_adapters, remove_pcr_duplicates, is_bulk_data, allocate_multi_mappings, only_output_unique_mappings, Tn5_shift, output_mapping_in_BED, output_mapping_in_TagAlign, output_mapping_in_PAF, cell_by_bin, bin_size, depth_cutoff_to_call_peak, peak_min_length, peak_merge_max_length, reference_file_path, index_file_path, read_file1_path, read_file2_path, barcode_file_path, barcode_whitelist_file_path, output_file_path, matrix_output_prefix);
          chromap_for_mapping.MapPairedEndReads();
        }
      }
    }
  } else if (result.count("h")) {
    std::cerr << options.help({"", "Indexing", "Mapping", "Input", "Output"});
    std::cerr << options.help({"", "Indexing", "Mapping", "Peak", "Input", "Output"});
  } else {
    std::cerr << options.help({"", "Indexing", "Mapping", "Input", "Output"});
    std::cerr << options.help({"", "Indexing", "Mapping", "Peak", "Input", "Output"});
  }
}
} // namespace chromap
+7 −2
Original line number Diff line number Diff line
@@ -75,7 +75,7 @@ class Chromap {
  }

  // For mapping
  Chromap(int error_threshold, int match_score, int mismatch_penalty, const std::vector<int> &gap_open_penalties, const std::vector<int> &gap_extension_penalties, int min_num_seeds_required_for_mapping, const std::vector<int> &max_seed_frequencies, int max_num_best_mappings, int max_insert_size, uint8_t mapq_threshold, int num_threads, int min_read_length, int multi_mapping_allocation_distance, int multi_mapping_allocation_seed, int drop_repetitive_reads, bool trim_adapters, bool remove_pcr_duplicates, bool is_bulk_data, bool allocate_multi_mappings, bool only_output_unique_mappings, bool Tn5_shift, bool output_mapping_in_BED, bool output_mapping_in_TagAlign, bool output_mapping_in_PAF, const std::string &reference_file_path, const std::string &index_file_path, const std::string &read_file1_path, const std::string &read_file2_path, const std::string &barcode_file_path, const std::string &barcode_whitelist_file_path, const std::string &mapping_output_file_path, const std::string &matrix_output_prefix) : error_threshold_(error_threshold), match_score_(match_score), mismatch_penalty_(mismatch_penalty), gap_open_penalties_(gap_open_penalties), gap_extension_penalties_(gap_extension_penalties), min_num_seeds_required_for_mapping_(min_num_seeds_required_for_mapping), max_seed_frequencies_(max_seed_frequencies), max_num_best_mappings_(max_num_best_mappings), max_insert_size_(max_insert_size), mapq_threshold_(mapq_threshold), num_threads_(num_threads), min_read_length_(min_read_length), multi_mapping_allocation_distance_(multi_mapping_allocation_distance), multi_mapping_allocation_seed_(multi_mapping_allocation_seed), drop_repetitive_reads_(drop_repetitive_reads), trim_adapters_(trim_adapters), remove_pcr_duplicates_(remove_pcr_duplicates), is_bulk_data_(is_bulk_data), allocate_multi_mappings_(allocate_multi_mappings), only_output_unique_mappings_(only_output_unique_mappings), Tn5_shift_(Tn5_shift), output_mapping_in_BED_(output_mapping_in_BED), output_mapping_in_TagAlign_(output_mapping_in_TagAlign), output_mapping_in_PAF_(output_mapping_in_PAF), reference_file_path_(reference_file_path), index_file_path_(index_file_path), read_file1_path_(read_file1_path), read_file2_path_(read_file2_path), barcode_file_path_(barcode_file_path), barcode_whitelist_file_path_(barcode_whitelist_file_path), mapping_output_file_path_(mapping_output_file_path), matrix_output_prefix_(matrix_output_prefix) {
  Chromap(int error_threshold, int match_score, int mismatch_penalty, const std::vector<int> &gap_open_penalties, const std::vector<int> &gap_extension_penalties, int min_num_seeds_required_for_mapping, const std::vector<int> &max_seed_frequencies, int max_num_best_mappings, int max_insert_size, uint8_t mapq_threshold, int num_threads, int min_read_length, int multi_mapping_allocation_distance, int multi_mapping_allocation_seed, int drop_repetitive_reads, bool trim_adapters, bool remove_pcr_duplicates, bool is_bulk_data, bool allocate_multi_mappings, bool only_output_unique_mappings, bool Tn5_shift, bool output_mapping_in_BED, bool output_mapping_in_TagAlign, bool output_mapping_in_PAF, bool cell_by_bin, int bin_size, uint16_t depth_cutoff_to_call_peak, int peak_min_length, int peak_merge_max_length, const std::string &reference_file_path, const std::string &index_file_path, const std::string &read_file1_path, const std::string &read_file2_path, const std::string &barcode_file_path, const std::string &barcode_whitelist_file_path, const std::string &mapping_output_file_path, const std::string &matrix_output_prefix) : error_threshold_(error_threshold), match_score_(match_score), mismatch_penalty_(mismatch_penalty), gap_open_penalties_(gap_open_penalties), gap_extension_penalties_(gap_extension_penalties), min_num_seeds_required_for_mapping_(min_num_seeds_required_for_mapping), max_seed_frequencies_(max_seed_frequencies), max_num_best_mappings_(max_num_best_mappings), max_insert_size_(max_insert_size), mapq_threshold_(mapq_threshold), num_threads_(num_threads), min_read_length_(min_read_length), multi_mapping_allocation_distance_(multi_mapping_allocation_distance), multi_mapping_allocation_seed_(multi_mapping_allocation_seed), drop_repetitive_reads_(drop_repetitive_reads), trim_adapters_(trim_adapters), remove_pcr_duplicates_(remove_pcr_duplicates), is_bulk_data_(is_bulk_data), allocate_multi_mappings_(allocate_multi_mappings), only_output_unique_mappings_(only_output_unique_mappings), Tn5_shift_(Tn5_shift), output_mapping_in_BED_(output_mapping_in_BED), output_mapping_in_TagAlign_(output_mapping_in_TagAlign), output_mapping_in_PAF_(output_mapping_in_PAF), cell_by_bin_(cell_by_bin), bin_size_(bin_size), depth_cutoff_to_call_peak_(depth_cutoff_to_call_peak), peak_min_length_(peak_min_length), peak_merge_max_length_(peak_merge_max_length), reference_file_path_(reference_file_path), index_file_path_(index_file_path), read_file1_path_(read_file1_path), read_file2_path_(read_file2_path), barcode_file_path_(barcode_file_path), barcode_whitelist_file_path_(barcode_whitelist_file_path), mapping_output_file_path_(mapping_output_file_path), matrix_output_prefix_(matrix_output_prefix) {
    barcode_lookup_table_ = kh_init(k32);
    barcode_whitelist_lookup_table_ = kh_init(k32_set);
    barcode_histogram_ = kh_init(k32);
@@ -158,7 +158,7 @@ class Chromap {
  void CorrectBarcodeAt(uint32_t barcode_index, SequenceBatch *barcode_batch, uint64_t *num_barcode_in_whitelist, uint64_t *num_corrected_barcode);
  uint32_t CallPeaks(uint16_t coverage_threshold, uint32_t num_reference_sequences, const SequenceBatch &reference);
  void OutputFeatureMatrix(uint32_t num_sequences, const SequenceBatch &reference);
  uint32_t Position2PeakIndex(uint32_t bin_size, uint32_t rid, uint32_t position, uint32_t num_sequences, const SequenceBatch &reference);
  void GetNumOverlappedBins(uint32_t rid, uint32_t start_position, uint16_t mapping_length, uint32_t num_sequences, const SequenceBatch &reference, std::vector<uint32_t> &overlapped_peak_indices);
  uint32_t GetNumOverlappedPeaks(uint32_t ref_id, const MappingRecord &mapping, std::vector<uint32_t> &overlapped_peak_indices);
  void BuildAugmentedTreeForPeaks(uint32_t ref_id);
  void OutputMappingsInVector(uint8_t mapq_threshold, uint32_t num_reference_sequences, const SequenceBatch &reference, const std::vector<std::vector<MappingRecord> > &mappings);
@@ -209,6 +209,11 @@ class Chromap {
  bool output_mapping_in_TagAlign_;
  bool output_mapping_in_PAF_;
  uint32_t read_batch_size_ = 1000000; // default batch size, # reads for single-end reads, # read pairs for paired-end reads
  bool cell_by_bin_;
  int bin_size_;
  uint16_t depth_cutoff_to_call_peak_;
  int peak_min_length_;
  int peak_merge_max_length_;
  std::string reference_file_path_;
  std::string index_file_path_;
  std::string read_file1_path_;
+2 −2
Original line number Diff line number Diff line
@@ -175,8 +175,8 @@ class OutputTools {
  void WriteMatrixOutputHead(uint64_t num_peaks, uint64_t num_barcodes, uint64_t num_lines) {
    fprintf(matrix_output_file_, "%lu\t%lu\t%lu\n", num_peaks, num_barcodes, num_lines);
  }
  void AppendMatrixOutput(uint64_t peak_index, uint64_t barcode_index, uint64_t num_mappings) {
    fprintf(matrix_output_file_, "%lu\t%lu\t%lu\n", peak_index, barcode_index, num_mappings);
  void AppendMatrixOutput(uint32_t peak_index, uint32_t barcode_index, uint32_t num_mappings) {
    fprintf(matrix_output_file_, "%u\t%u\t%u\n", peak_index, barcode_index, num_mappings);
  }
  inline void FinalizeMatrixOutput() {
    fclose(matrix_output_file_);