Commit 8674acf1 authored by Li's avatar Li Committed by Li Song
Browse files

Implement the count for total and unmapped.

parent 1a537f74
Loading
Loading
Loading
Loading
+25 −0
Original line number Diff line number Diff line
@@ -425,6 +425,18 @@ void Chromap::MapSingleEndReads() {
          }
          // std::cerr<<"cache memusage: " <<
          // mm_to_candidates_cache.GetMemoryBytes() <<"\n" ;
          if (!mapping_parameters_.summary_metadata_file_path.empty()) {
            if (mapping_parameters_.is_bulk_data) 
              mapping_writer.UpdateSummaryMetadata(0, SUMMARY_METADATA_TOTAL, 
                  num_loaded_reads) ;
            else
            {
              for (uint32_t read_index = 0; read_index < num_loaded_reads; ++read_index)
                mapping_writer.UpdateSummaryMetadata(
                    barcode_batch.GenerateSeedFromSequenceAt(read_index, 0, barcode_length_), 
                    SUMMARY_METADATA_TOTAL, 1);
            }
          }
          num_loaded_reads = num_loaded_reads_for_loading;
          read_batch_for_loading.SwapSequenceBatch(read_batch);
          barcode_batch_for_loading.SwapSequenceBatch(barcode_batch);
@@ -964,6 +976,19 @@ void Chromap::MapPairedEndReads() {
            }
          }

          if (!mapping_parameters_.summary_metadata_file_path.empty()) {
            if (mapping_parameters_.is_bulk_data) 
              mapping_writer.UpdateSummaryMetadata(0, SUMMARY_METADATA_TOTAL, 
                  num_loaded_pairs) ;
            else
            {
              for (uint32_t pair_index = 0; pair_index < num_loaded_pairs; ++pair_index)
                mapping_writer.UpdateSummaryMetadata(
                    barcode_batch.GenerateSeedFromSequenceAt(pair_index, 0, barcode_length_), 
                    SUMMARY_METADATA_TOTAL, 1);
            }
          }

          std::cerr << "Mapped " << num_loaded_pairs << " read pairs in "
                    << GetRealTime() - real_batch_start_time << "s.\n";
          real_batch_start_time = GetRealTime();
+30 −1
Original line number Diff line number Diff line
@@ -69,6 +69,7 @@ class MappingWriter {
          &temp_mapping_file_handles);

  void OutputSummaryMetadata();
  void UpdateSummaryMetadata(uint64_t barcode, int type, int change);

 protected:
  void AppendMapping(uint32_t rid, const SequenceBatch &reference,
@@ -288,8 +289,13 @@ void MappingWriter<MappingRecord>::ProcessAndOutputMappingsInLowMemory(
        } else {
          if (!mapping_parameters_.summary_metadata_file_path.empty())
            summary_metadata_.UpdateCount(last_mapping.GetBarcode(), SUMMARY_METADATA_LOWMAPQ, 
                last_mapping.num_dups_);
                std::min((uint32_t)std::numeric_limits<uint8_t>::max(),
                                         num_last_mapping_dups));
        }
        if (!mapping_parameters_.summary_metadata_file_path.empty())
          summary_metadata_.UpdateCount(last_mapping.GetBarcode(), SUMMARY_METADATA_MAPPED, 
              std::min((uint32_t)std::numeric_limits<uint8_t>::max(),
                num_last_mapping_dups));

        if (last_mapping.is_unique_ == 1) {
          ++num_uni_mappings;
@@ -327,7 +333,20 @@ void MappingWriter<MappingRecord>::ProcessAndOutputMappingsInLowMemory(
    }
    AppendMapping(last_rid, reference, last_mapping);
    ++num_mappings_passing_filters;
    
    if (!mapping_parameters_.summary_metadata_file_path.empty())
      summary_metadata_.UpdateCount(last_mapping.GetBarcode(), SUMMARY_METADATA_DUP,
          last_mapping.num_dups_ - 1);
  } else {
    if (!mapping_parameters_.summary_metadata_file_path.empty())
      summary_metadata_.UpdateCount(last_mapping.GetBarcode(), SUMMARY_METADATA_LOWMAPQ, 
          std::min((uint32_t)std::numeric_limits<uint8_t>::max(),
                                   num_last_mapping_dups));
  }
  if (!mapping_parameters_.summary_metadata_file_path.empty())
    summary_metadata_.UpdateCount(last_mapping.GetBarcode(), SUMMARY_METADATA_MAPPED, 
        std::min((uint32_t)std::numeric_limits<uint8_t>::max(),
          num_last_mapping_dups));

  if (last_mapping.is_unique_ == 1) {
    ++num_uni_mappings;
@@ -399,6 +418,9 @@ void MappingWriter<MappingRecord>::OutputMappingsInVector(
          summary_metadata_.UpdateCount(it->GetBarcode(), SUMMARY_METADATA_LOWMAPQ,
              it->num_dups_);
      }
      if (!mapping_parameters_.summary_metadata_file_path.empty())
        summary_metadata_.UpdateCount(it->GetBarcode(), SUMMARY_METADATA_MAPPED,
            it->num_dups_);
    }
  }
  std::cerr << "Number of output mappings (passed filters): "
@@ -421,6 +443,13 @@ void MappingWriter<MappingRecord>::OutputSummaryMetadata() {
    summary_metadata_.Output(mapping_parameters_.summary_metadata_file_path.c_str());
}

template <typename MappingRecord>
  void MappingWriter<MappingRecord>::UpdateSummaryMetadata(uint64_t barcode, int type, int change)
{
  if (!mapping_parameters_.summary_metadata_file_path.empty())
    summary_metadata_.UpdateCount(barcode, type, change);
}

// Specialization for BED format.
template <>
void MappingWriter<MappingWithBarcode>::OutputHeader(
+6 −2
Original line number Diff line number Diff line
@@ -16,7 +16,7 @@ namespace chromap {
enum SummaryMetadataField {
  SUMMARY_METADATA_TOTAL = 0,
  SUMMARY_METADATA_DUP,
  SUMMARY_METADATA_UNMAPPED,
  SUMMARY_METADATA_MAPPED,
  SUMMARY_METADATA_LOWMAPQ,
  SUMMARY_METADATA_FIELDS
};
@@ -50,7 +50,11 @@ class SummaryMetadata {
        fprintf(fp, "%s", Seed2Sequence(kh_key(barcode_metadata_, k), barcode_length_).c_str());
        int i;
        for (i = 0; i < SUMMARY_METADATA_FIELDS; ++i) {
          if (i != SUMMARY_METADATA_MAPPED)
            fprintf(fp, ",%d", kh_value(barcode_metadata_, k).counts[i]);
          else
            fprintf(fp, ",%d", kh_value(barcode_metadata_, k).counts[SUMMARY_METADATA_TOTAL]
                - kh_value(barcode_metadata_, k).counts[SUMMARY_METADATA_MAPPED]);
        }
        fprintf(fp, "\n");
      }