Commit 63b06d3b authored by oma219's avatar oma219 Committed by Li Song
Browse files

added option for frip est. params/compute est. frip for every barcode/include in summary file

parent 083a20c4
Loading
Loading
Loading
Loading
+27 −2
Original line number Diff line number Diff line
@@ -9,6 +9,8 @@
#include <tuple>
#include <vector>

#include <sstream> // Used for frip est params splitting

#include "candidate_processor.h"
#include "cxxopts.hpp"
#include "draft_mapping_generator.h"
@@ -633,6 +635,29 @@ void Chromap::MapPairedEndReads() {
  
  std::vector<uint64_t> seeds_for_batch(500000, 0);

  // Parse out the parameters for chromap score (const, fric, dup, unmapped, lowmapq)
  std::vector<double> frip_est_params; 
  std::stringstream ss(mapping_parameters_.frip_est_params);
  std::string token;

  while(std::getline(ss, token, ';')) {
    try {
      auto curr_param = std::stod(token);
      frip_est_params.push_back(curr_param);
    } catch(...) {
      chromap::ExitWithMessage(
        "\nException occurred while processing chromap score parameters\n"
        );
    }
  }
  if (frip_est_params.size() != 5) {
    chromap::ExitWithMessage(
      "\nInvalid number of parameters, expecting 5 parameters but found " 
      + std::to_string(frip_est_params.size()) 
      + " parameters\n"
      );
  }

  // Initialize cache
  mm_cache mm_to_candidates_cache(mapping_parameters_.cache_size);
  mm_to_candidates_cache.SetKmerLength(kmer_size);
@@ -1261,8 +1286,8 @@ void Chromap::MapPairedEndReads() {
  }
  if (mapping_parameters_.mapping_output_format == MAPPINGFORMAT_SAM)
    mapping_writer.AdjustSummaryPairedEndOverCount() ;
  mapping_writer.OutputSummaryMetadata();
  
  mapping_writer.OutputSummaryMetadata(frip_est_params);
  reference.FinalizeLoading();
  
  std::cerr << "Total time: " << GetRealTime() - real_start_time << "s.\n";
+6 −1
Original line number Diff line number Diff line
@@ -80,7 +80,9 @@ void AddMappingOptions(cxxopts::Options &options) {
      ("cache-size", "number of cache entries [4000003]", cxxopts::value<int>(), "INT")
      ("cache-update-param", "value used to control number of reads sampled [0.01]", cxxopts::value<double>(), "FLT")
      ("use-all-reads", "use all reads for cache")
      ("debug-cache", "verbose output for debugging cache used in chromap");
      ("debug-cache", "verbose output for debugging cache used in chromap")
      ("frip-est-params", "coefficients used for chromap score calculation separated by semi-colons",
      cxxopts::value<std::string>(), "STR");
}

void AddInputOptions(cxxopts::Options &options) {
@@ -349,6 +351,9 @@ void ChromapDriver::ParseArgsAndRun(int argc, char *argv[]) {
  if (result.count("debug-cache")) {
    mapping_parameters.debug_cache = true;
  }
  if (result.count("frip-est-params")) {
    mapping_parameters.frip_est_params = result["frip-est-params"].as<std::string>();
  }



+1 −0
Original line number Diff line number Diff line
@@ -28,6 +28,7 @@ struct MappingParameters {
  int cache_size = 4000003;
  bool use_all_reads = false;
  bool debug_cache = false;
  std::string frip_est_params = "0.2809;0.8921;7.490e-06;-1.893e-05;-2.178e-05";

  // Read with # best mappings greater than it will have this number of best
  // mappings reported.
+7 −4
Original line number Diff line number Diff line
@@ -68,7 +68,7 @@ class MappingWriter {
      std::vector<TempMappingFileHandle<MappingRecord>>
          &temp_mapping_file_handles);

  void OutputSummaryMetadata();
  void OutputSummaryMetadata(std::vector<double> frip_est_coeffs = {0.0, 0.0, 0.0, 0.0, 0.0});
  void UpdateSummaryMetadata(uint64_t barcode, int type, int change);
  void UpdateSpeicalCategorySummaryMetadata(int category, int type, int change);
  void AdjustSummaryPairedEndOverCount();
@@ -444,11 +444,14 @@ void MappingWriter<MappingRecord>::OutputMappings(
}

template <typename MappingRecord>
void MappingWriter<MappingRecord>::OutputSummaryMetadata() {
void MappingWriter<MappingRecord>::OutputSummaryMetadata(std::vector<double> frip_est_coeffs) {
  if (!mapping_parameters_.summary_metadata_file_path.empty())
  {
    summary_metadata_.Output(mapping_parameters_.summary_metadata_file_path.c_str(),
        !mapping_parameters_.barcode_whitelist_file_path.empty() && !mapping_parameters_.output_mappings_not_in_whitelist);
    summary_metadata_.Output(
        mapping_parameters_.summary_metadata_file_path.c_str(),
        !mapping_parameters_.barcode_whitelist_file_path.empty() && !mapping_parameters_.output_mappings_not_in_whitelist,
        frip_est_coeffs
        );
  }
}

+22 −8
Original line number Diff line number Diff line
@@ -42,7 +42,7 @@ class SummaryMetadata {
    kh_destroy(k64_barcode_metadata, barcode_metadata_);
  }

  inline void OutputCounts(const char *barcode, const int *counts, FILE *fp)
  inline void OutputCounts(const char *barcode, const int *counts, FILE *fp, std::vector<double> frip_est_coeffs)
  {
    // define variables to store values
    size_t num_total = counts[SUMMARY_METADATA_TOTAL];
@@ -55,31 +55,45 @@ class SummaryMetadata {
    size_t num_cachehit = counts[SUMMARY_METADATA_CACHEHIT];
    double fric = (double) num_cachehit / (double) num_mapped;

    // compute the estimated frip
    double est_frip = frip_est_coeffs[0] + /* constant */
                      (frip_est_coeffs[1] * fric) +
                      (frip_est_coeffs[2] * num_dup) +
                      (frip_est_coeffs[3] * num_unmapped)  +
                      (frip_est_coeffs[4] * num_lowmapq);

    // print barcode as string
    fprintf(fp, "%s,%ld,%ld,%ld,%ld,%ld,%.5lf\n", 
    fprintf(fp, "%s,%ld,%ld,%ld,%ld,%ld,%.5lf,%.5lf\n", 
            barcode,
            num_total,
            num_dup,
            num_unmapped,
            num_lowmapq,
            num_cachehit,
            fric);
            fric,
            est_frip);
  }

  void Output(const char *filename, bool has_white_list) {
  void Output(const char *filename, bool has_white_list, std::vector<double> frip_est_coeffs) {
    FILE *fp = fopen(filename, "w");
    fprintf(fp, "barcode,total,duplicate,unmapped,lowmapq,cachehit,fric\n");   
    fprintf(fp, "barcode,total,duplicate,unmapped,lowmapq,cachehit,fric,estfrip\n");   
    khiter_t k;
    for (k = kh_begin(barcode_metadata_); k != kh_end(barcode_metadata_); ++k)
      if (kh_exist(barcode_metadata_, k)) {
        OutputCounts(
                    Seed2Sequence(kh_key(barcode_metadata_, k), barcode_length_).c_str(),
                    kh_value(barcode_metadata_, k).counts, 
                    fp
                    fp,
                    frip_est_coeffs
                    );
      }
    if (has_white_list) {
      OutputCounts("non-whitelist", nonwhitelist_summary_.counts, fp) ;
      OutputCounts(
                   "non-whitelist", 
                   nonwhitelist_summary_.counts, 
                   fp,
                   frip_est_coeffs
                   ) ;
    }
    fclose(fp);
  }