Commit 2942102a authored by Haowen Zhang's avatar Haowen Zhang
Browse files

Move mapping stats functions to MappingProcessor.

parent da05c6e7
Loading
Loading
Loading
Loading
+4 −26
Original line number Original line Diff line number Diff line
@@ -1095,8 +1095,6 @@ void Chromap<MappingRecord>::MapPairedEndReads() {
        num_mappings_in_mem, num_reference_sequences, reference,
        num_mappings_in_mem, num_reference_sequences, reference,
        barcode_whitelist_lookup_table_, temp_mapping_file_handles);
        barcode_whitelist_lookup_table_, temp_mapping_file_handles);
  } else {
  } else {
    // OutputMappingStatistics(num_reference_sequences,
    // mappings_on_diff_ref_seqs, mappings_on_diff_ref_seqs);
    if (mapping_parameters_.Tn5_shift) {
    if (mapping_parameters_.Tn5_shift) {
      mapping_processor.ApplyTn5ShiftOnMappings(num_reference_sequences,
      mapping_processor.ApplyTn5ShiftOnMappings(num_reference_sequences,
                                                mappings_on_diff_ref_seqs);
                                                mappings_on_diff_ref_seqs);
@@ -1106,7 +1104,7 @@ void Chromap<MappingRecord>::MapPairedEndReads() {
      mapping_processor.RemovePCRDuplicate(num_reference_sequences,
      mapping_processor.RemovePCRDuplicate(num_reference_sequences,
                                           mappings_on_diff_ref_seqs);
                                           mappings_on_diff_ref_seqs);
      std::cerr << "After removing PCR duplications, ";
      std::cerr << "After removing PCR duplications, ";
      OutputMappingStatistics(num_reference_sequences,
      mapping_processor.OutputMappingStatistics(num_reference_sequences,
                              mappings_on_diff_ref_seqs);
                              mappings_on_diff_ref_seqs);
    } else {
    } else {
      mapping_processor.SortOutputMappings(num_reference_sequences,
      mapping_processor.SortOutputMappings(num_reference_sequences,
@@ -1121,7 +1119,7 @@ void Chromap<MappingRecord>::MapPairedEndReads() {
          mapping_parameters_.multi_mapping_allocation_distance,
          mapping_parameters_.multi_mapping_allocation_distance,
          mappings_on_diff_ref_seqs);
          mappings_on_diff_ref_seqs);
      std::cerr << "After allocating multi-mappings, ";
      std::cerr << "After allocating multi-mappings, ";
      OutputMappingStatistics(num_reference_sequences,
      mapping_processor.OutputMappingStatistics(num_reference_sequences,
                              mappings_on_diff_ref_seqs);
                              mappings_on_diff_ref_seqs);
      mapping_processor.SortOutputMappings(num_reference_sequences,
      mapping_processor.SortOutputMappings(num_reference_sequences,
                                           mappings_on_diff_ref_seqs);
                                           mappings_on_diff_ref_seqs);
@@ -1496,7 +1494,7 @@ void Chromap<MappingRecord>::MapSingleEndReads() {
      mapping_processor.RemovePCRDuplicate(num_reference_sequences,
      mapping_processor.RemovePCRDuplicate(num_reference_sequences,
                                           mappings_on_diff_ref_seqs);
                                           mappings_on_diff_ref_seqs);
      std::cerr << "After removing PCR duplications, ";
      std::cerr << "After removing PCR duplications, ";
      OutputMappingStatistics(num_reference_sequences,
      mapping_processor.OutputMappingStatistics(num_reference_sequences,
                              mappings_on_diff_ref_seqs);
                              mappings_on_diff_ref_seqs);
    } else {
    } else {
      mapping_processor.SortOutputMappings(num_reference_sequences,
      mapping_processor.SortOutputMappings(num_reference_sequences,
@@ -1511,7 +1509,7 @@ void Chromap<MappingRecord>::MapSingleEndReads() {
          mapping_parameters_.multi_mapping_allocation_distance,
          mapping_parameters_.multi_mapping_allocation_distance,
          mappings_on_diff_ref_seqs);
          mappings_on_diff_ref_seqs);
      std::cerr << "After allocating multi-mappings, ";
      std::cerr << "After allocating multi-mappings, ";
      OutputMappingStatistics(num_reference_sequences,
      mapping_processor.OutputMappingStatistics(num_reference_sequences,
                              mappings_on_diff_ref_seqs);
                              mappings_on_diff_ref_seqs);
      mapping_processor.SortOutputMappings(num_reference_sequences,
      mapping_processor.SortOutputMappings(num_reference_sequences,
                                           mappings_on_diff_ref_seqs);
                                           mappings_on_diff_ref_seqs);
@@ -1604,26 +1602,6 @@ void Chromap<MappingRecord>::OutputMappingStatistics() {
            << num_mappings_ - num_uniquely_mapped_reads_ << ".\n";
            << num_mappings_ - num_uniquely_mapped_reads_ << ".\n";
}
}


template <typename MappingRecord>
void Chromap<MappingRecord>::OutputMappingStatistics(
    uint32_t num_reference_sequences,
    const std::vector<std::vector<MappingRecord>> &mappings_on_diff_ref_seqs) {
  uint64_t num_uni_mappings = 0;
  uint64_t num_multi_mappings = 0;
  for (auto &mappings_on_one_ref_seq : mappings_on_diff_ref_seqs) {
    for (auto &mapping : mappings_on_one_ref_seq) {
      if ((mapping.is_unique_) == 1) {
        ++num_uni_mappings;
      } else {
        ++num_multi_mappings;
      }
    }
  }
  std::cerr << "# uni-mappings: " << num_uni_mappings
            << ", # multi-mappings: " << num_multi_mappings
            << ", total: " << num_uni_mappings + num_multi_mappings << ".\n";
}

template <typename MappingRecord>
template <typename MappingRecord>
void Chromap<MappingRecord>::LoadBarcodeWhitelist() {
void Chromap<MappingRecord>::LoadBarcodeWhitelist() {
  double real_start_time = GetRealTime();
  double real_start_time = GetRealTime();
+1 −4
Original line number Original line Diff line number Diff line
@@ -105,15 +105,12 @@ class Chromap {


  void OutputMappingStatistics();
  void OutputMappingStatistics();


  void OutputMappingStatistics(uint32_t num_reference_sequences,
                               const std::vector<std::vector<MappingRecord> >
                                   &mappings_on_diff_ref_seqs);

  void GenerateCustomizedRidRank(const std::string &rid_order_path,
  void GenerateCustomizedRidRank(const std::string &rid_order_path,
                                 uint32_t num_reference_sequences,
                                 uint32_t num_reference_sequences,
                                 const SequenceBatch &reference,
                                 const SequenceBatch &reference,
                                 std::vector<int> &rid_rank);
                                 std::vector<int> &rid_rank);


  // TODO: generate reranked candidates directly.
  void RerankCandidatesRid(std::vector<Candidate> &candidates);
  void RerankCandidatesRid(std::vector<Candidate> &candidates);


  void ParseReadFormat(const std::string &read_format);
  void ParseReadFormat(const std::string &read_format);
+24 −0
Original line number Original line Diff line number Diff line
@@ -67,6 +67,10 @@ class MappingProcessor {
          &mappings_on_diff_ref_seqs_for_diff_threads_for_saving,
          &mappings_on_diff_ref_seqs_for_diff_threads_for_saving,
      std::vector<std::vector<MappingRecord>> &mappings_on_diff_ref_seqs);
      std::vector<std::vector<MappingRecord>> &mappings_on_diff_ref_seqs);


  void OutputMappingStatistics(
      uint32_t num_reference_sequences,
      const std::vector<std::vector<MappingRecord>> &mappings_on_diff_ref_seqs);

 private:
 private:
  void BuildAugmentedTree(
  void BuildAugmentedTree(
      uint32_t ref_id,
      uint32_t ref_id,
@@ -424,6 +428,26 @@ MappingProcessor<MappingRecord>::MoveMappingsInBuffersToMappingContainer(
  return num_moved_mappings;
  return num_moved_mappings;
}
}


template <typename MappingRecord>
void MappingProcessor<MappingRecord>::OutputMappingStatistics(
    uint32_t num_reference_sequences,
    const std::vector<std::vector<MappingRecord>> &mappings_on_diff_ref_seqs) {
  uint64_t num_uni_mappings = 0;
  uint64_t num_multi_mappings = 0;
  for (auto &mappings_on_one_ref_seq : mappings_on_diff_ref_seqs) {
    for (auto &mapping : mappings_on_one_ref_seq) {
      if ((mapping.is_unique_) == 1) {
        ++num_uni_mappings;
      } else {
        ++num_multi_mappings;
      }
    }
  }
  std::cerr << "# uni-mappings: " << num_uni_mappings
            << ", # multi-mappings: " << num_multi_mappings
            << ", total: " << num_uni_mappings + num_multi_mappings << ".\n";
}

}  // namespace chromap
}  // namespace chromap


#endif  // MAPPING_PROCESSOR_H_
#endif  // MAPPING_PROCESSOR_H_