Commit 4673c364 authored by mourisl's avatar mourisl Committed by Li Song
Browse files

Add the row that summarize the number of fragments that can not be barcode corrected

parent 5e67115d
Loading
Loading
Loading
Loading
+12 −1
Original line number Diff line number Diff line
@@ -29,7 +29,7 @@
#include "temp_mapping.h"
#include "utils.h"

#define CHROMAP_VERSION "0.2.7-r492"
#define CHROMAP_VERSION "0.2.7-r493"

namespace chromap {

@@ -441,12 +441,18 @@ void Chromap::MapSingleEndReads() {
              mapping_writer.UpdateSummaryMetadata(0, SUMMARY_METADATA_TOTAL, 
                  num_loaded_reads) ;
            else {
              uint32_t nonwhitelist_count = 0;
              for (uint32_t read_index = 0; read_index < num_loaded_reads; ++read_index)
                if (read_map_summary[read_index] & 1) {
                  mapping_writer.UpdateSummaryMetadata(
                      barcode_batch.GenerateSeedFromSequenceAt(read_index, 0, barcode_length_), 
                      SUMMARY_METADATA_TOTAL, 1);
                } else {
                  ++nonwhitelist_count;
                }
              
              mapping_writer.UpdateSpeicalCategorySummaryMetadata(/*nonwhitelist*/0, 
                  SUMMARY_METADATA_TOTAL, nonwhitelist_count);
            }

            // By default, set the lowest bit to 1 (whether the barcode is in the whitelist)
@@ -1015,12 +1021,17 @@ void Chromap::MapPairedEndReads() {
              mapping_writer.UpdateSummaryMetadata(0, SUMMARY_METADATA_TOTAL, 
                  num_loaded_pairs) ;
            else {
              uint32_t nonwhitelist_count = 0 ;
              for (uint32_t pair_index = 0; pair_index < num_loaded_pairs; ++pair_index)
                if (read_map_summary[pair_index] & 1) {
                  mapping_writer.UpdateSummaryMetadata(
                      barcode_batch.GenerateSeedFromSequenceAt(pair_index, 0, barcode_length_), 
                      SUMMARY_METADATA_TOTAL, 1);
                } else {
                  ++nonwhitelist_count ;
                }
              mapping_writer.UpdateSpeicalCategorySummaryMetadata(/*nonwhitelist*/0, 
                  SUMMARY_METADATA_TOTAL, nonwhitelist_count);
            }
            
            memset(read_map_summary, 1, sizeof(*read_map_summary)*read_batch_size_);
+14 −5
Original line number Diff line number Diff line
@@ -70,6 +70,7 @@ class MappingWriter {

  void OutputSummaryMetadata();
  void UpdateSummaryMetadata(uint64_t barcode, int type, int change);
  void UpdateSpeicalCategorySummaryMetadata(int category, int type, int change);
  void AdjustSummaryPairedEndOverCount();

 protected:
@@ -446,20 +447,28 @@ template <typename MappingRecord>
void MappingWriter<MappingRecord>::OutputSummaryMetadata() {
  if (!mapping_parameters_.summary_metadata_file_path.empty())
  {
    summary_metadata_.Output(mapping_parameters_.summary_metadata_file_path.c_str());
    summary_metadata_.Output(mapping_parameters_.summary_metadata_file_path.c_str(),
        !mapping_parameters_.barcode_whitelist_file_path.empty() && !mapping_parameters_.output_mappings_not_in_whitelist);
  }
}

template <typename MappingRecord>
  void MappingWriter<MappingRecord>::UpdateSummaryMetadata(uint64_t barcode, int type, int change)
{
  void MappingWriter<MappingRecord>::UpdateSummaryMetadata(uint64_t barcode, int type, int change) {
  if (!mapping_parameters_.summary_metadata_file_path.empty())
    summary_metadata_.UpdateCount(barcode, type, change);
}

// category: 0: non-whitelist barcode
template <typename MappingRecord>
  void MappingWriter<MappingRecord>::AdjustSummaryPairedEndOverCount()
{
  void MappingWriter<MappingRecord>::UpdateSpeicalCategorySummaryMetadata(int category, int type, int change) {
  if (!mapping_parameters_.summary_metadata_file_path.empty()) {
    if (category == 0)
      summary_metadata_.UpdateNonWhitelistCount(type, change);
  }
}

template <typename MappingRecord>
  void MappingWriter<MappingRecord>::AdjustSummaryPairedEndOverCount() {
  if (!mapping_parameters_.summary_metadata_file_path.empty()
      && mapping_parameters_.mapping_output_format == MAPPINGFORMAT_SAM)
      summary_metadata_.AdjustPairedEndOverCount() ; 
+24 −11
Original line number Diff line number Diff line
@@ -41,22 +41,30 @@ class SummaryMetadata {
    kh_destroy(k64_barcode_metadata, barcode_metadata_);
  }

  void Output(const char *filename) {
  void OutputCounts(const char *barcode, const int *counts, FILE *fp)
  {
    fprintf(fp, "%s", barcode) ;
    int i ;
    for (i = 0; i < SUMMARY_METADATA_FIELDS; ++i) {
      if (i != SUMMARY_METADATA_MAPPED)
        fprintf(fp, ",%d", counts[i]);
      else // The output in the summary is for unmapped reads
        fprintf(fp, ",%d", counts[SUMMARY_METADATA_TOTAL] - counts[SUMMARY_METADATA_MAPPED]) ;
    }
    fprintf(fp, "\n");
  }

  void Output(const char *filename, bool has_white_list) {
    FILE *fp = fopen(filename, "w");
    fprintf(fp, "barcode,total,duplicate,unmapped,lowmapq\n");   
    khiter_t k;
    for (k = kh_begin(barcode_metadata_); k != kh_end(barcode_metadata_); ++k)
      if (kh_exist(barcode_metadata_, k)) {
        fprintf(fp, "%s", Seed2Sequence(kh_key(barcode_metadata_, k), barcode_length_).c_str());
        int i;
        for (i = 0; i < SUMMARY_METADATA_FIELDS; ++i) {
          if (i != SUMMARY_METADATA_MAPPED)
            fprintf(fp, ",%d", kh_value(barcode_metadata_, k).counts[i]);
          else
            fprintf(fp, ",%d", kh_value(barcode_metadata_, k).counts[SUMMARY_METADATA_TOTAL]
                - kh_value(barcode_metadata_, k).counts[SUMMARY_METADATA_MAPPED]);
        OutputCounts(Seed2Sequence(kh_key(barcode_metadata_, k), barcode_length_).c_str(),
            kh_value(barcode_metadata_, k).counts, fp) ;
      }
        fprintf(fp, "\n");
    if (has_white_list) {
      OutputCounts("non-whitelist", nonwhitelist_summary_.counts, fp) ;
    }
    fclose(fp);
  }
@@ -71,6 +79,10 @@ class SummaryMetadata {
    kh_value(barcode_metadata_, barcode_metadata_iter).counts[type] += change;
  }

  void UpdateNonWhitelistCount(int type, int change) {
    nonwhitelist_summary_.counts[type] += change;
  }

  void SetBarcodeLength(int l) {
    barcode_length_ = l;
  }
@@ -88,6 +100,7 @@ class SummaryMetadata {

 private:
  khash_t(k64_barcode_metadata) *barcode_metadata_;    
  struct _barcodeSummaryMetadata nonwhitelist_summary_;  // summarize the fragments with no barcode information 
  int barcode_length_;

  std::string Seed2Sequence(uint64_t seed, uint32_t seed_length) const {