Commit f1027656 authored by Haowen Zhang's avatar Haowen Zhang Committed by Haowen Zhang
Browse files

Output seq and qual in SAM

parent c5947019
Loading
Loading
Loading
Loading
+6 −6
Original line number Diff line number Diff line
@@ -1759,8 +1759,8 @@ void Chromap<MappingRecord>::ProcessBestMappingsForPairedEndReadOnOneDirection(D
				  }
				//printf("%d %d\n", ref_start_position1, ref_end_position1);
				//printf("%d %d\n", ref_start_position2, ref_end_position2);
				  EmplaceBackMappingRecord(read_id, read1_name, barcode_key, 1, ref_start_position1, rid1, flag1, first_read_direction == kPositive ? 1 : 0, is_unique, mapq, NM1, n_cigar1, cigar1, MD_tag1, &((*mappings_on_diff_ref_seqs)[rid1])); 
				  EmplaceBackMappingRecord(read_id, read2_name, barcode_key, 1, ref_start_position2, rid2, flag2, second_read_direction == kPositive ? 1 : 0, is_unique, mapq, NM2, n_cigar2, cigar2, MD_tag2, &((*mappings_on_diff_ref_seqs)[rid2])); 
				  EmplaceBackMappingRecord(read_id, read1_name, barcode_key, 1, ref_start_position1, rid1, flag1, first_read_direction == kPositive ? 1 : 0, is_unique, mapq, NM1, n_cigar1, cigar1, MD_tag1, read1, read_batch1.GetSequenceQualAt(pair_index), &((*mappings_on_diff_ref_seqs)[rid1])); 
				  EmplaceBackMappingRecord(read_id, read2_name, barcode_key, 1, ref_start_position2, rid2, flag2, second_read_direction == kPositive ? 1 : 0, is_unique, mapq, NM2, n_cigar2, cigar2, MD_tag2, read2, read_batch2.GetSequenceQualAt(pair_index), &((*mappings_on_diff_ref_seqs)[rid2])); 
				} else if (output_mapping_in_pairs_) {
					int position1 = ref_start_position1;
					int position2 = ref_start_position2;
@@ -2135,12 +2135,12 @@ void Chromap<PAFMapping>::EmplaceBackMappingRecord(uint32_t read_id, const char
}

template<typename MappingRecord>
void Chromap<MappingRecord>::EmplaceBackMappingRecord(uint32_t read_id, const char *read_name, uint32_t cell_barcode, uint8_t num_dups, int64_t position, int rid, int flag, uint8_t direction, uint8_t is_unique, uint8_t mapq, uint32_t NM, int n_cigar, uint32_t *cigar, std::string &MD_tag, std::vector<MappingRecord> *mappings_on_diff_ref_seqs) {
void Chromap<MappingRecord>::EmplaceBackMappingRecord(uint32_t read_id, const char *read_name, uint32_t cell_barcode, uint8_t num_dups, int64_t position, int rid, int flag, uint8_t direction, uint8_t is_unique, uint8_t mapq, uint32_t NM, int n_cigar, uint32_t *cigar, std::string &MD_tag, const char* read, const char* read_qual, std::vector<MappingRecord> *mappings_on_diff_ref_seqs) {
}

template<>
void Chromap<SAMMapping>::EmplaceBackMappingRecord(uint32_t read_id, const char *read_name, uint32_t cell_barcode, uint8_t num_dups, int64_t position, int rid, int flag, uint8_t direction, uint8_t is_unique, uint8_t mapq, uint32_t NM, int n_cigar, uint32_t *cigar, std::string &MD_tag, std::vector<SAMMapping> *mappings_on_diff_ref_seqs) {
  mappings_on_diff_ref_seqs->emplace_back(SAMMapping{read_id, std::string(read_name), cell_barcode, num_dups, position, rid, flag, direction, 0, is_unique, mapq, NM, n_cigar, cigar, MD_tag});
void Chromap<SAMMapping>::EmplaceBackMappingRecord(uint32_t read_id, const char *read_name, uint32_t cell_barcode, uint8_t num_dups, int64_t position, int rid, int flag, uint8_t direction, uint8_t is_unique, uint8_t mapq, uint32_t NM, int n_cigar, uint32_t *cigar, std::string &MD_tag, const char* read, const char* read_qual, std::vector<SAMMapping> *mappings_on_diff_ref_seqs) {
  mappings_on_diff_ref_seqs->emplace_back(SAMMapping{read_id, std::string(read_name), cell_barcode, num_dups, position, rid, flag, direction, 0, is_unique, mapq, NM, n_cigar, cigar, MD_tag, std::string(read), std::string(read_qual)});
}

template <typename MappingRecord>
@@ -2437,7 +2437,7 @@ void Chromap<MappingRecord>::ProcessBestMappingsForSingleEndRead(Direction mappi
		if (*num_best_mappings_reported >= 1) {
			flag |= BAM_FSECONDARY;
		}
		EmplaceBackMappingRecord(read_id, read_name, barcode_key, 1, ref_start_position, rid, flag, 0, is_unique, mapq, NM, n_cigar, cigar, MD_tag, &((*mappings_on_diff_ref_seqs)[rid])); 
		EmplaceBackMappingRecord(read_id, read_name, barcode_key, 1, ref_start_position, rid, flag, 0, is_unique, mapq, NM, n_cigar, cigar, MD_tag, read, read_batch.GetSequenceQualAt(read_index), &((*mappings_on_diff_ref_seqs)[rid])); 
	} else if (output_mapping_in_PAF_) {
        	EmplaceBackMappingRecord(read_id, read_name, read_length, barcode_key, ref_start_position, ref_end_position - ref_start_position + 1, mapq, direction, is_unique, 1, &((*mappings_on_diff_ref_seqs)[rid]));
	} else {
+1 −1
Original line number Diff line number Diff line
@@ -138,7 +138,7 @@ class Chromap {
  void ProcessBestMappingsForSingleEndRead(Direction mapping_direction, uint8_t mapq, int num_candidates, uint32_t repetitive_seed_length, int min_num_errors, int num_best_mappings, int second_min_num_errors, int num_second_best_mappings, const SequenceBatch &read_batch, uint32_t read_index, const SequenceBatch &reference, const SequenceBatch &barcode_batch, const std::vector<int> &best_mapping_indices, const std::vector<std::pair<int, uint64_t> > &mappings, const std::vector<int> &split_sites, int *best_mapping_index, int *num_best_mappings_reported, std::vector<std::vector<MappingRecord> > *mappings_on_diff_ref_seqs);
  void EmplaceBackMappingRecord(uint32_t read_id, uint32_t barcode, uint32_t fragment_start_position, uint16_t fragment_length, uint8_t mapq, uint8_t direction, uint8_t is_unique, uint8_t num_dups, std::vector<MappingRecord> *mappings_on_diff_ref_seqs);
  void EmplaceBackMappingRecord(uint32_t read_id, const char* read_name, uint16_t read_length, uint32_t barcode, uint32_t fragment_start_position, uint16_t fragment_length, uint8_t mapq, uint8_t direction, uint8_t is_unique, uint8_t num_dups, std::vector<MappingRecord> *mappings_on_diff_ref_seqs);
  void EmplaceBackMappingRecord(uint32_t read_id, const char *read_name, uint32_t cell_barcode, uint8_t num_dups, int64_t position, int rid, int flag, uint8_t direction, uint8_t is_unique, uint8_t mapq, uint32_t NM, int n_cigar, uint32_t *cigar, std::string &MD_tag, std::vector<MappingRecord> *mappings_on_diff_ref_seqs);
  void EmplaceBackMappingRecord(uint32_t read_id, const char *read_name, uint32_t cell_barcode, uint8_t num_dups, int64_t position, int rid, int flag, uint8_t direction, uint8_t is_unique, uint8_t mapq, uint32_t NM, int n_cigar, uint32_t *cigar, std::string &MD_tag,  const char* read, const char* read_qual, std::vector<MappingRecord> *mappings_on_diff_ref_seqs);
  void ApplyTn5ShiftOnSingleEndMapping(uint32_t num_reference_sequences, std::vector<std::vector<MappingRecord> > *mappings);
  uint32_t LoadSingleEndReadsWithBarcodes(SequenceBatch *read_batch, SequenceBatch *barcode_batch);

+18 −3
Original line number Diff line number Diff line
@@ -314,6 +314,8 @@ struct SAMMapping {
  int n_cigar;     // number of CIGAR operations
  uint32_t *cigar; // CIGAR in the BAM encoding: opLen<<4|op; op to integer mapping: MIDSH=>01234
  std::string MD;
  std::string sequence;
  std::string sequence_qual;
  //char *XA;        // alternative mappings
  //int score, sub, alt_sc;
  bool operator<(const SAMMapping& m) const {
@@ -409,6 +411,10 @@ struct SAMMapping {
    if (MD_length > 0) {
      num_written_bytes += fwrite(MD.data(), sizeof(char), MD_length, temp_mapping_output_file);
    }
    uint16_t sequence_length = sequence.length();
    num_written_bytes += fwrite(&sequence_length, sizeof(uint16_t), 1, temp_mapping_output_file);
    num_written_bytes += fwrite(sequence.data(), sizeof(char), sequence_length, temp_mapping_output_file);
    num_written_bytes += fwrite(sequence_qual.data(), sizeof(char), sequence_length, temp_mapping_output_file);
    return num_written_bytes;
  }
  size_t LoadFromFile(FILE *temp_mapping_output_file) {
@@ -441,6 +447,14 @@ struct SAMMapping {
      MD = std::string(MD_length, '\0');
      num_read_bytes += fread(&(MD[0]), sizeof(char), MD_length, temp_mapping_output_file);
    }
    uint16_t sequence_length = 0;
    num_read_bytes += fread(&sequence_length, sizeof(uint16_t), 1, temp_mapping_output_file);
    if (sequence_length > 0) {
      sequence = std::string(sequence_length, '\0');
      sequence_qual = std::string(sequence_length, '\0');
      num_read_bytes += fread(&(sequence[0]), sizeof(char), sequence_length, temp_mapping_output_file);
      num_read_bytes += fread(&(sequence_qual[0]), sizeof(char), sequence_length, temp_mapping_output_file);
    }
    return num_read_bytes;
  }
};
@@ -1260,11 +1274,12 @@ inline void SAMOutputTools<SAMMapping>::AppendMapping(uint32_t rid, const Sequen
  //std::string strand = (mapping.direction & 1) == 1 ? "+" : "-";
  //uint32_t mapping_end_position = mapping.fragment_start_position + mapping.fragment_length;
  const char *reference_sequence_name = (mapping.flag & BAM_FUNMAP) > 0 ? "*" : reference.GetSequenceNameAt(rid);

  this->AppendMappingOutput(mapping.read_name + "\t" + std::to_string(mapping.flag) + "\t" + std::string(reference_sequence_name) + "\t" + std::to_string(mapping.GetStartPosition()) + "\t" + std::to_string(mapping.mapq) + "\t" + mapping.GenerateCigarString() + "\t*\t" + std::to_string(0) + "\t" + std::to_string(0) + "\t" + mapping.sequence + "\t" + mapping.sequence_qual + "\t" + mapping.GenerateIntTagString("NM", mapping.NM) + "\tMD:Z:" + mapping.MD);
  if (cell_barcode_length_ > 0) {
    this->AppendMappingOutput(mapping.read_name + "\t" + std::to_string(mapping.flag) + "\t" + std::string(reference_sequence_name) + "\t" + std::to_string(mapping.GetStartPosition()) + "\t" + std::to_string(mapping.mapq) + "\t" + mapping.GenerateCigarString() + "\t*\t" + std::to_string(0) + "\t" + std::to_string(0) + "\t*\t*\t" + mapping.GenerateIntTagString("NM", mapping.NM) + "\tMD:Z:" + mapping.MD + "\tCB:Z:" + Seed2Sequence(mapping.cell_barcode, cell_barcode_length_) + "\n");
  } else {
    this->AppendMappingOutput(mapping.read_name + "\t" + std::to_string(mapping.flag) + "\t" + std::string(reference_sequence_name) + "\t" + std::to_string(mapping.GetStartPosition()) + "\t" + std::to_string(mapping.mapq) + "\t" + mapping.GenerateCigarString() + "\t*\t" + std::to_string(0) + "\t" + std::to_string(0) + "\t*\t*\t" + mapping.GenerateIntTagString("NM", mapping.NM) + "\tMD:Z:" + mapping.MD + "\n");
    this->AppendMappingOutput("\tCB:Z:" + Seed2Sequence(mapping.cell_barcode, cell_barcode_length_));
  }
  this->AppendMappingOutput("\n");
}

template <>