Commit 267c10c2 authored by Haowen Zhang's avatar Haowen Zhang Committed by Li Song
Browse files

Move out feature matrix funcs in OutputTools.

Note that the feature matrix related code will not work for now.
parent 1cba6dfc
Loading
Loading
Loading
Loading
+13 −11
Original line number Diff line number Diff line
@@ -420,8 +420,8 @@ uint32_t Chromap<PairedEndMappingWithBarcode>::CallPeaks(
        peaks_on_diff_ref_seqs_[ri].emplace_back(
            Peak{peak_start_position, peak_length, peak_count});
        tree_extras_on_diff_ref_seqs_[ri].emplace_back(0);
        output_tools_.OutputPeaks(peak_start_position, peak_length, ri,
                                  reference);
        feature_barcode_matrix_writer_.OutputPeaks(peak_start_position,
                                                   peak_length, ri, reference);
        ++peak_count;
        peak_length = 0;
      }
@@ -443,7 +443,8 @@ void Chromap<PairedEndMappingWithBarcode>::OutputFeatureMatrix(
    uint32_t num_sequences, const SequenceBatch &reference) {
  uint32_t num_peaks = 0;
  if (cell_by_bin_) {
    output_tools_.OutputPeaks(bin_size_, num_sequences, reference);
    feature_barcode_matrix_writer_.OutputPeaks(bin_size_, num_sequences,
                                               reference);
    for (uint32_t i = 0; i < num_sequences; ++i) {
      uint32_t ref_seq_length = reference.GetSequenceLengthAt(i);
      num_peaks += ref_seq_length / bin_size_;
@@ -475,7 +476,7 @@ void Chromap<PairedEndMappingWithBarcode>::OutputFeatureMatrix(
        kh_value(barcode_index_table_, barcode_index_table_iterator) =
            barcode_index;
        ++barcode_index;
        output_tools_.AppendBarcodeOutput(barcode_key);
        feature_barcode_matrix_writer_.AppendBarcodeOutput(barcode_key);
      }
    }
  }
@@ -522,8 +523,8 @@ void Chromap<PairedEndMappingWithBarcode>::OutputFeatureMatrix(
            << Chromap<>::GetRealTime() - real_start_time << "s.\n";
  // Output matrix
  real_start_time = GetRealTime();
  output_tools_.WriteMatrixOutputHead(num_peaks, kh_size(barcode_index_table_),
                                      kh_size(matrix));
  feature_barcode_matrix_writer_.WriteMatrixOutputHead(
      num_peaks, kh_size(barcode_index_table_), kh_size(matrix));
  uint64_t key;
  uint32_t value;
  std::vector<std::pair<uint64_t, uint32_t>> feature_matrix;
@@ -535,9 +536,9 @@ void Chromap<PairedEndMappingWithBarcode>::OutputFeatureMatrix(
  kh_destroy(kmatrix, matrix);
  std::sort(feature_matrix.begin(), feature_matrix.end());
  for (size_t i = 0; i < feature_matrix.size(); ++i) {
    output_tools_.AppendMatrixOutput((uint32_t)feature_matrix[i].first,
                                     (uint32_t)(feature_matrix[i].first >> 32),
                                     feature_matrix[i].second);
    feature_barcode_matrix_writer_.AppendMatrixOutput(
        (uint32_t)feature_matrix[i].first,
        (uint32_t)(feature_matrix[i].first >> 32), feature_matrix[i].second);
  }
  std::cerr << "Output feature matrix in "
            << Chromap<>::GetRealTime() - real_start_time << "s.\n";
@@ -1812,9 +1813,10 @@ void Chromap<MappingRecord>::MapPairedEndReads() {
    }

    if (!is_bulk_data_ && !matrix_output_prefix_.empty()) {
      output_tools_.InitializeMatrixOutput(matrix_output_prefix_);
      feature_barcode_matrix_writer_.InitializeMatrixOutput(
          matrix_output_prefix_);
      OutputFeatureMatrix(num_reference_sequences, reference);
      output_tools_.FinalizeMatrixOutput();
      feature_barcode_matrix_writer_.FinalizeMatrixOutput();
    }
  }

+2 −0
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@
#include <tuple>
#include <vector>

#include "feature_barcode_matrix_writer.h"
#include "index.h"
#include "khash.h"
#include "ksort.h"
@@ -620,6 +621,7 @@ class Chromap {
  // (max_level, # nodes)
  std::vector<std::pair<int, uint32_t> > tree_info_on_diff_ref_seqs_;
  OutputTools<MappingRecord> output_tools_;
  FeatureBarcodeMatrixWriter feature_barcode_matrix_writer_;
  // For mapping stats.
  uint64_t num_candidates_ = 0;
  uint64_t num_mappings_ = 0;
+101 −0
Original line number Diff line number Diff line
#ifndef FEATUREBARCODEMATRIXWRITER_H_
#define FEATUREBARCODEMATRIXWRITER_H_

#include <assert.h>

#include <cinttypes>
#include <cstring>
#include <functional>
#include <iostream>
#include <string>
#include <vector>

#include "barcode_translator.h"
#include "sequence_batch.h"

namespace chromap {

// The code here is not working properly since the barcode length is not set.
// But this feature is not used in the realse for now so this is fine.
class FeatureBarcodeMatrixWriter {
 public:
  FeatureBarcodeMatrixWriter() {}
  ~FeatureBarcodeMatrixWriter() {}

  inline void InitializeMatrixOutput(const std::string &matrix_output_prefix) {
    matrix_output_prefix_ = matrix_output_prefix;
    matrix_output_file_ =
        fopen((matrix_output_prefix_ + "_matrix.mtx").c_str(), "w");
    assert(matrix_output_file_ != nullptr);
    peak_output_file_ =
        fopen((matrix_output_prefix_ + "_peaks.bed").c_str(), "w");
    assert(peak_output_file_ != nullptr);
    barcode_output_file_ =
        fopen((matrix_output_prefix_ + "_barcode.tsv").c_str(), "w");
    assert(barcode_output_file_ != nullptr);
  }

  void OutputPeaks(uint32_t bin_size, uint32_t num_sequences,
                   const SequenceBatch &reference) {
    for (uint32_t rid = 0; rid < num_sequences; ++rid) {
      uint32_t sequence_length = reference.GetSequenceLengthAt(rid);
      const char *sequence_name = reference.GetSequenceNameAt(rid);
      for (uint32_t position = 0; position < sequence_length;
           position += bin_size) {
        fprintf(peak_output_file_, "%s\t%u\t%u\n", sequence_name, position + 1,
                position + bin_size);
      }
    }
  }

  void OutputPeaks(uint32_t peak_start_position, uint16_t peak_length,
                   uint32_t rid, const SequenceBatch &reference) {
    const char *sequence_name = reference.GetSequenceNameAt(rid);
    fprintf(peak_output_file_, "%s\t%u\t%u\n", sequence_name,
            peak_start_position + 1, peak_start_position + peak_length);
  }

  void AppendBarcodeOutput(uint64_t barcode_key) {
    fprintf(barcode_output_file_, "%s-1\n",
            barcode_translator_.Translate(barcode_key, cell_barcode_length_)
                .data());
  }

  void WriteMatrixOutputHead(uint64_t num_peaks, uint64_t num_barcodes,
                             uint64_t num_lines) {
    fprintf(matrix_output_file_, "%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\n",
            num_peaks, num_barcodes, num_lines);
  }

  void AppendMatrixOutput(uint32_t peak_index, uint32_t barcode_index,
                          uint32_t num_mappings) {
    fprintf(matrix_output_file_, "%u\t%u\t%u\n", peak_index, barcode_index,
            num_mappings);
  }

  inline void FinalizeMatrixOutput() {
    fclose(matrix_output_file_);
    fclose(peak_output_file_);
    fclose(barcode_output_file_);
  }

  inline void SetBarcodeTranslateTable(const std::string &file) {
    barcode_translator_.SetTranslateTable(file);
  }

  inline void SetBarcodeLength(uint32_t cell_barcode_length) {
    cell_barcode_length_ = cell_barcode_length;
  }

 protected:
  uint32_t cell_barcode_length_ = 16;
  std::string matrix_output_prefix_;
  FILE *peak_output_file_ = nullptr;
  FILE *barcode_output_file_ = nullptr;
  FILE *matrix_output_file_ = nullptr;
  BarcodeTranslator barcode_translator_;
};

}  // namespace chromap

#endif  // FEATUREBARCODEMATRIXWRITER_H_
+1 −63
Original line number Diff line number Diff line
@@ -10,13 +10,13 @@
#include <string>
#include <vector>

#include "barcode_translator.h"
#include "bed_mapping.h"
#include "mapping.h"
#include "paf_mapping.h"
#include "pairs_mapping.h"
#include "sam_mapping.h"
#include "sequence_batch.h"
#include "barcode_translator.h"

namespace chromap {

@@ -100,64 +100,6 @@ class OutputTools {
    return sequence;
  }

  // Below are functions to output feature matrix.
  inline void InitializeMatrixOutput(const std::string &matrix_output_prefix) {
    matrix_output_prefix_ = matrix_output_prefix;
    matrix_output_file_ =
        fopen((matrix_output_prefix_ + "_matrix.mtx").c_str(), "w");
    assert(matrix_output_file_ != NULL);
    peak_output_file_ =
        fopen((matrix_output_prefix_ + "_peaks.bed").c_str(), "w");
    assert(peak_output_file_ != NULL);
    barcode_output_file_ =
        fopen((matrix_output_prefix_ + "_barcode.tsv").c_str(), "w");
    assert(barcode_output_file_ != NULL);
  }

  void OutputPeaks(uint32_t bin_size, uint32_t num_sequences,
                   const SequenceBatch &reference) {
    for (uint32_t rid = 0; rid < num_sequences; ++rid) {
      uint32_t sequence_length = reference.GetSequenceLengthAt(rid);
      const char *sequence_name = reference.GetSequenceNameAt(rid);
      for (uint32_t position = 0; position < sequence_length;
           position += bin_size) {
        fprintf(peak_output_file_, "%s\t%u\t%u\n", sequence_name, position + 1,
                position + bin_size);
      }
    }
  }

  void OutputPeaks(uint32_t peak_start_position, uint16_t peak_length,
                   uint32_t rid, const SequenceBatch &reference) {
    const char *sequence_name = reference.GetSequenceNameAt(rid);
    fprintf(peak_output_file_, "%s\t%u\t%u\n", sequence_name,
            peak_start_position + 1, peak_start_position + peak_length);
  }

  void AppendBarcodeOutput(uint64_t barcode_key) {
    fprintf(barcode_output_file_, "%s-1\n",
            barcode_translator_.Translate(barcode_key, cell_barcode_length_).data());
            //Seed2Sequence(barcode_key, cell_barcode_length_).data());
  }

  void WriteMatrixOutputHead(uint64_t num_peaks, uint64_t num_barcodes,
                             uint64_t num_lines) {
    fprintf(matrix_output_file_, "%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\n",
            num_peaks, num_barcodes, num_lines);
  }

  void AppendMatrixOutput(uint32_t peak_index, uint32_t barcode_index,
                          uint32_t num_mappings) {
    fprintf(matrix_output_file_, "%u\t%u\t%u\n", peak_index, barcode_index,
            num_mappings);
  }

  inline void FinalizeMatrixOutput() {
    fclose(matrix_output_file_);
    fclose(peak_output_file_);
    fclose(barcode_output_file_);
  }

  inline void SetPairsCustomRidRank(const std::vector<int> &custom_rid_rank) {
    custom_rid_rank_ = custom_rid_rank;
  }
@@ -175,10 +117,6 @@ class OutputTools {
  MappingOutputFormat mapping_output_format_ = MAPPINGFORMAT_BED;
  uint32_t num_mappings_;
  uint32_t cell_barcode_length_ = 16;
  std::string matrix_output_prefix_;
  FILE *peak_output_file_;
  FILE *barcode_output_file_;
  FILE *matrix_output_file_;
  BarcodeTranslator barcode_translator_;
};