Commit 1da16b17 authored by Haowen Zhang's avatar Haowen Zhang
Browse files

Use a local variable to save mappings.

parent 1519d2a3
Loading
Loading
Loading
Loading
+64 −47
Original line number Diff line number Diff line
@@ -536,13 +536,6 @@ void Chromap<MappingRecord>::ProcessAndOutputMappingsInLowMemory(
    const SequenceBatch &reference,
    const MappingProcessor<MappingRecord> &mapping_processor,
    MappingWriter<MappingRecord> &mapping_writer) {
  // First, process the mappings in the memory and save them on disk.
  if (num_mappings_in_mem > 0) {
    OutputTempMappings(num_reference_sequences, mapping_processor,
                       mapping_writer);
    num_mappings_in_mem = 0;
  }

  if (temp_mapping_file_handles_.size() == 0) {
    return;
  }
@@ -845,10 +838,11 @@ void Chromap<MappingRecord>::MapPairedEndReads() {
  struct _mm_history *mm_history1 = new struct _mm_history[read_batch_size_];
  struct _mm_history *mm_history2 = new struct _mm_history[read_batch_size_];

  std::vector<std::vector<MappingRecord>> mappings_on_diff_ref_seqs;
  // Initialize mapping container
  mappings_on_diff_ref_seqs_.reserve(num_reference_sequences);
  mappings_on_diff_ref_seqs.reserve(num_reference_sequences);
  for (uint32_t i = 0; i < num_reference_sequences; ++i) {
    mappings_on_diff_ref_seqs_.emplace_back(std::vector<MappingRecord>());
    mappings_on_diff_ref_seqs.emplace_back(std::vector<MappingRecord>());
  }

  // Preprocess barcodes for single cell data
@@ -956,7 +950,7 @@ void Chromap<MappingRecord>::MapPairedEndReads() {
      }
    }

#pragma omp parallel default(none) shared(output_mappings_not_in_whitelist_, barcode_whitelist_file_path_, trim_adapters_, window_size_, custom_rid_order_path_, split_alignment_, error_threshold_, max_seed_frequencies_, max_num_best_mappings_, max_insert_size_, num_threads_, num_reads_, low_memory_mode_, num_reference_sequences, reference, index, read_batch1, read_batch2, barcode_batch, read_batch1_for_loading, read_batch2_for_loading, barcode_batch_for_loading, candidate_processor, mapping_processor, mapping_generator, mapping_writer, std::cerr, num_loaded_pairs_for_loading, num_loaded_pairs, mappings_on_diff_ref_seqs_for_diff_threads, mappings_on_diff_ref_seqs_for_diff_threads_for_saving, mappings_on_diff_ref_seqs_, mapping_output_file_path_, num_mappings_in_mem, max_num_mappings_in_mem, temp_mapping_file_handles_, mm_to_candidates_cache, mm_history1, mm_history2) num_threads(num_threads_) reduction(+:num_candidates_, num_mappings_, num_mapped_reads_, num_uniquely_mapped_reads_, num_barcode_in_whitelist_, num_corrected_barcode_)
#pragma omp parallel default(none) shared(output_mappings_not_in_whitelist_, barcode_whitelist_file_path_, trim_adapters_, window_size_, custom_rid_order_path_, split_alignment_, error_threshold_, max_seed_frequencies_, max_num_best_mappings_, max_insert_size_, num_threads_, num_reads_, low_memory_mode_, num_reference_sequences, reference, index, read_batch1, read_batch2, barcode_batch, read_batch1_for_loading, read_batch2_for_loading, barcode_batch_for_loading, candidate_processor, mapping_processor, mapping_generator, mapping_writer, std::cerr, num_loaded_pairs_for_loading, num_loaded_pairs, mappings_on_diff_ref_seqs_for_diff_threads, mappings_on_diff_ref_seqs_for_diff_threads_for_saving, mappings_on_diff_ref_seqs, mapping_output_file_path_, num_mappings_in_mem, max_num_mappings_in_mem, temp_mapping_file_handles_, mm_to_candidates_cache, mm_history1, mm_history2) num_threads(num_threads_) reduction(+:num_candidates_, num_mappings_, num_mapped_reads_, num_uniquely_mapped_reads_, num_barcode_in_whitelist_, num_corrected_barcode_)
    {
      thread_num_candidates = 0;
      thread_num_mappings = 0;
@@ -1248,10 +1242,12 @@ void Chromap<MappingRecord>::MapPairedEndReads() {
            // Handle output
            num_mappings_in_mem += MoveMappingsInBuffersToMappingContainer(
                num_reference_sequences,
                mappings_on_diff_ref_seqs_for_diff_threads_for_saving);
                mappings_on_diff_ref_seqs_for_diff_threads_for_saving,
                mappings_on_diff_ref_seqs);
            if (low_memory_mode_ &&
                num_mappings_in_mem > max_num_mappings_in_mem) {
              OutputTempMappings(num_reference_sequences, mapping_processor,
              OutputTempMappings(num_reference_sequences,
                                 mappings_on_diff_ref_seqs, mapping_processor,
                                 mapping_writer);
              num_mappings_in_mem = 0;
            }
@@ -1289,27 +1285,35 @@ void Chromap<MappingRecord>::MapPairedEndReads() {
  index.Destroy();

  if (low_memory_mode_) {
    // First, process the remaining mappings in the memory and save them on
    // disk.
    if (num_mappings_in_mem > 0) {
      OutputTempMappings(num_reference_sequences, mappings_on_diff_ref_seqs,
                         mapping_processor, mapping_writer);
      num_mappings_in_mem = 0;
    }

    ProcessAndOutputMappingsInLowMemory(num_mappings_in_mem,
                                        num_reference_sequences, reference,
                                        mapping_processor, mapping_writer);
  } else {
    // OutputMappingStatistics(num_reference_sequences,
    // mappings_on_diff_ref_seqs_, mappings_on_diff_ref_seqs_);
    // mappings_on_diff_ref_seqs, mappings_on_diff_ref_seqs);
    if (Tn5_shift_) {
      mapping_processor.ApplyTn5ShiftOnMappings(num_reference_sequences,
                                                mappings_on_diff_ref_seqs_);
                                                mappings_on_diff_ref_seqs);
    }

    if (remove_pcr_duplicates_) {
      mapping_processor.RemovePCRDuplicate(num_reference_sequences,
                                           mappings_on_diff_ref_seqs_);
                                           mappings_on_diff_ref_seqs);
      std::cerr << "After removing PCR duplications, ";
      OutputMappingStatistics(num_reference_sequences,
                              mappings_on_diff_ref_seqs_,
                              mappings_on_diff_ref_seqs_);
                              mappings_on_diff_ref_seqs,
                              mappings_on_diff_ref_seqs);
    } else {
      mapping_processor.SortOutputMappings(num_reference_sequences,
                                           mappings_on_diff_ref_seqs_);
                                           mappings_on_diff_ref_seqs);
    }

    if (allocate_multi_mappings_) {
@@ -1317,16 +1321,16 @@ void Chromap<MappingRecord>::MapPairedEndReads() {
          num_mapped_reads_ - num_uniquely_mapped_reads_;
      mapping_processor.AllocateMultiMappings(
          num_reference_sequences, num_multi_mappings,
          multi_mapping_allocation_distance_, mappings_on_diff_ref_seqs_);
          multi_mapping_allocation_distance_, mappings_on_diff_ref_seqs);
      std::cerr << "After allocating multi-mappings, ";
      OutputMappingStatistics(num_reference_sequences,
                              mappings_on_diff_ref_seqs_,
                              mappings_on_diff_ref_seqs_);
                              mappings_on_diff_ref_seqs,
                              mappings_on_diff_ref_seqs);
      mapping_processor.SortOutputMappings(num_reference_sequences,
                                           mappings_on_diff_ref_seqs_);
                                           mappings_on_diff_ref_seqs);
    }
    OutputMappings(num_reference_sequences, reference,
                   mappings_on_diff_ref_seqs_, mapping_writer);
                   mappings_on_diff_ref_seqs, mapping_writer);

    // Temporarily disable feature matrix output. Do not delete the following
    // commented code.
@@ -1338,9 +1342,9 @@ void Chromap<MappingRecord>::MapPairedEndReads() {
    //        depth_cutoff_to_call_peak_);
    //    std::vector<std::vector<PairedEndMappingWithBarcode>> &mappings =
    //        allocate_multi_mappings_
    //            ? allocated_mappings_on_diff_ref_seqs_
    //            : (remove_pcr_duplicates_ ? deduped_mappings_on_diff_ref_seqs_
    //                                      : mappings_on_diff_ref_seqs_);
    //            ? allocated_mappings_on_diff_ref_seqs
    //            : (remove_pcr_duplicates_ ? deduped_mappings_on_diff_ref_seqs
    //                                      : mappings_on_diff_ref_seqs);

    //    feature_barcode_matrix.OutputFeatureMatrix(num_reference_sequences,
    //                                               reference, mappings,
@@ -1359,6 +1363,7 @@ void Chromap<MappingRecord>::MapPairedEndReads() {
template <typename MappingRecord>
void Chromap<MappingRecord>::OutputTempMappings(
    uint32_t num_reference_sequences,
    std::vector<std::vector<MappingRecord>> &mappings_on_diff_ref_seqs,
    const MappingProcessor<MappingRecord> &mapping_processor,
    MappingWriter<MappingRecord> &mapping_writer) {
  TempMappingFileHandle<MappingRecord> temp_mapping_file_handle;
@@ -1368,14 +1373,14 @@ void Chromap<MappingRecord>::OutputTempMappings(
  temp_mapping_file_handles_.emplace_back(temp_mapping_file_handle);

  mapping_processor.SortOutputMappings(num_reference_sequences,
                                       mappings_on_diff_ref_seqs_);
                                       mappings_on_diff_ref_seqs);

  mapping_writer.OutputTempMapping(temp_mapping_file_handle.file_path,
                                   num_reference_sequences,
                                   mappings_on_diff_ref_seqs_);
                                   mappings_on_diff_ref_seqs);

  for (uint32_t i = 0; i < num_reference_sequences; ++i) {
    mappings_on_diff_ref_seqs_[i].clear();
    mappings_on_diff_ref_seqs[i].clear();
  }
}

@@ -1445,9 +1450,10 @@ void Chromap<MappingRecord>::MapSingleEndReads() {
  barcode_batch_for_loading.SetSeqEffectiveRange(
      barcode_format_[0], barcode_format_[1], barcode_format_[2]);

  mappings_on_diff_ref_seqs_.reserve(num_reference_sequences);
  std::vector<std::vector<MappingRecord>> mappings_on_diff_ref_seqs;
  mappings_on_diff_ref_seqs.reserve(num_reference_sequences);
  for (uint32_t i = 0; i < num_reference_sequences; ++i) {
    mappings_on_diff_ref_seqs_.emplace_back(std::vector<MappingRecord>());
    mappings_on_diff_ref_seqs.emplace_back(std::vector<MappingRecord>());
  }

  // Preprocess barcodes for single cell data
@@ -1547,7 +1553,7 @@ void Chromap<MappingRecord>::MapSingleEndReads() {
            num_threads_ / num_reference_sequences);
      }
    }
#pragma omp parallel default(none) shared(barcode_whitelist_file_path_, output_mappings_not_in_whitelist_, window_size_, custom_rid_order_path_, error_threshold_, max_num_best_mappings_, max_seed_frequencies_, num_threads_, num_reads_, mm_history, reference, index, read_batch, barcode_batch, read_batch_for_loading, barcode_batch_for_loading, std::cerr, num_loaded_reads_for_loading, num_loaded_reads, num_reference_sequences, mappings_on_diff_ref_seqs_for_diff_threads, mappings_on_diff_ref_seqs_for_diff_threads_for_saving, mm_to_candidates_cache, mapping_writer, candidate_processor, mapping_processor, mapping_generator, low_memory_mode_, num_mappings_in_mem, max_num_mappings_in_mem) num_threads(num_threads_) reduction(+:num_candidates_, num_mappings_, num_mapped_reads_, num_uniquely_mapped_reads_, num_barcode_in_whitelist_, num_corrected_barcode_)
#pragma omp parallel default(none) shared(barcode_whitelist_file_path_, output_mappings_not_in_whitelist_, window_size_, custom_rid_order_path_, error_threshold_, max_num_best_mappings_, max_seed_frequencies_, num_threads_, num_reads_, mm_history, reference, index, read_batch, barcode_batch, read_batch_for_loading, barcode_batch_for_loading, std::cerr, num_loaded_reads_for_loading, num_loaded_reads, num_reference_sequences, mappings_on_diff_ref_seqs_for_diff_threads, mappings_on_diff_ref_seqs_for_diff_threads_for_saving, mappings_on_diff_ref_seqs, mm_to_candidates_cache, mapping_writer, candidate_processor, mapping_processor, mapping_generator, low_memory_mode_, num_mappings_in_mem, max_num_mappings_in_mem) num_threads(num_threads_) reduction(+:num_candidates_, num_mappings_, num_mapped_reads_, num_uniquely_mapped_reads_, num_barcode_in_whitelist_, num_corrected_barcode_)
    {
      thread_num_candidates = 0;
      thread_num_mappings = 0;
@@ -1682,10 +1688,12 @@ void Chromap<MappingRecord>::MapSingleEndReads() {
          {
            num_mappings_in_mem += MoveMappingsInBuffersToMappingContainer(
                num_reference_sequences,
                mappings_on_diff_ref_seqs_for_diff_threads_for_saving);
                mappings_on_diff_ref_seqs_for_diff_threads_for_saving,
                mappings_on_diff_ref_seqs);
            if (low_memory_mode_ &&
                num_mappings_in_mem > max_num_mappings_in_mem) {
              OutputTempMappings(num_reference_sequences, mapping_processor,
              OutputTempMappings(num_reference_sequences,
                                 mappings_on_diff_ref_seqs, mapping_processor,
                                 mapping_writer);
              num_mappings_in_mem = 0;
            }
@@ -1722,25 +1730,33 @@ void Chromap<MappingRecord>::MapSingleEndReads() {
  index.Destroy();

  if (low_memory_mode_) {
    // First, process the remaining mappings in the memory and save them on
    // disk.
    if (num_mappings_in_mem > 0) {
      OutputTempMappings(num_reference_sequences, mappings_on_diff_ref_seqs,
                         mapping_processor, mapping_writer);
      num_mappings_in_mem = 0;
    }

    ProcessAndOutputMappingsInLowMemory(num_mappings_in_mem,
                                        num_reference_sequences, reference,
                                        mapping_processor, mapping_writer);
  } else {
    if (Tn5_shift_) {
      mapping_processor.ApplyTn5ShiftOnMappings(num_reference_sequences,
                                                mappings_on_diff_ref_seqs_);
                                                mappings_on_diff_ref_seqs);
    }

    if (remove_pcr_duplicates_) {
      mapping_processor.RemovePCRDuplicate(num_reference_sequences,
                                           mappings_on_diff_ref_seqs_);
                                           mappings_on_diff_ref_seqs);
      std::cerr << "After removing PCR duplications, ";
      OutputMappingStatistics(num_reference_sequences,
                              mappings_on_diff_ref_seqs_,
                              mappings_on_diff_ref_seqs_);
                              mappings_on_diff_ref_seqs,
                              mappings_on_diff_ref_seqs);
    } else {
      mapping_processor.SortOutputMappings(num_reference_sequences,
                                           mappings_on_diff_ref_seqs_);
                                           mappings_on_diff_ref_seqs);
    }

    if (allocate_multi_mappings_) {
@@ -1748,16 +1764,16 @@ void Chromap<MappingRecord>::MapSingleEndReads() {
          num_mapped_reads_ - num_uniquely_mapped_reads_;
      mapping_processor.AllocateMultiMappings(
          num_reference_sequences, num_multi_mappings,
          multi_mapping_allocation_distance_, mappings_on_diff_ref_seqs_);
          multi_mapping_allocation_distance_, mappings_on_diff_ref_seqs);
      std::cerr << "After allocating multi-mappings, ";
      OutputMappingStatistics(num_reference_sequences,
                              mappings_on_diff_ref_seqs_,
                              mappings_on_diff_ref_seqs_);
                              mappings_on_diff_ref_seqs,
                              mappings_on_diff_ref_seqs);
      mapping_processor.SortOutputMappings(num_reference_sequences,
                                           mappings_on_diff_ref_seqs_);
                                           mappings_on_diff_ref_seqs);
    }
    OutputMappings(num_reference_sequences, reference,
                   mappings_on_diff_ref_seqs_, mapping_writer);
                   mappings_on_diff_ref_seqs, mapping_writer);
  }

  mapping_writer.FinalizeMappingOutput();
@@ -1821,15 +1837,16 @@ template <typename MappingRecord>
uint32_t Chromap<MappingRecord>::MoveMappingsInBuffersToMappingContainer(
    uint32_t num_reference_sequences,
    std::vector<std::vector<std::vector<MappingRecord>>>
        &mappings_on_diff_ref_seqs_for_diff_threads_for_saving) {
        &mappings_on_diff_ref_seqs_for_diff_threads_for_saving,
    std::vector<std::vector<MappingRecord>> &mappings_on_diff_ref_seqs) {
  // double real_start_time = Chromap<>::GetRealTime();
  uint32_t num_moved_mappings = 0;
  for (int ti = 0; ti < num_threads_; ++ti) {
    for (uint32_t i = 0; i < num_reference_sequences; ++i) {
      num_moved_mappings +=
          mappings_on_diff_ref_seqs_for_diff_threads_for_saving[ti][i].size();
      mappings_on_diff_ref_seqs_[i].insert(
          mappings_on_diff_ref_seqs_[i].end(),
      mappings_on_diff_ref_seqs[i].insert(
          mappings_on_diff_ref_seqs[i].end(),
          std::make_move_iterator(
              mappings_on_diff_ref_seqs_for_diff_threads_for_saving[ti][i]
                  .begin()),
+3 −2
Original line number Diff line number Diff line
@@ -183,7 +183,8 @@ class Chromap {
  uint32_t MoveMappingsInBuffersToMappingContainer(
      uint32_t num_reference_sequences,
      std::vector<std::vector<std::vector<MappingRecord> > >
          &mappings_on_diff_ref_seqs_for_diff_threads_for_saving);
          &mappings_on_diff_ref_seqs_for_diff_threads_for_saving,
      std::vector<std::vector<MappingRecord> > &mappings_on_diff_ref_seqs);

  void OutputBarcodeStatistics();

@@ -196,6 +197,7 @@ class Chromap {

  void OutputTempMappings(
      uint32_t num_reference_sequences,
      std::vector<std::vector<MappingRecord> > &mappings_on_diff_ref_seqs,
      const MappingProcessor<MappingRecord> &mapping_processor,
      MappingWriter<MappingRecord> &mapping_writer);

@@ -290,7 +292,6 @@ class Chromap {
  // For mapping
  int min_unique_mapping_mapq_ = 4;
  std::vector<TempMappingFileHandle<MappingRecord> > temp_mapping_file_handles_;
  std::vector<std::vector<MappingRecord> > mappings_on_diff_ref_seqs_;
  // For mapping stats.
  uint64_t num_candidates_ = 0;
  uint64_t num_mappings_ = 0;