libStatGen Software  1
SamRecord Class Reference

Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record. More...

#include <SamRecord.h>

Public Types

enum  SequenceTranslation { NONE , EQUAL , BASES }
 Enum containing the settings on how to translate the sequence if a reference is available. More...
 

Public Member Functions

 SamRecord ()
 Default Constructor.
 
 SamRecord (ErrorHandler::HandlingType errorHandlingType)
 Constructor that sets the error handling type. More...
 
 ~SamRecord ()
 Destructor.
 
void resetRecord ()
 Reset the fields of the record to a default value. More...
 
bool isValid (SamFileHeader &header)
 Returns whether or not the record is valid, setting the status to indicate success or failure. More...
 
void setReference (GenomeSequence *reference)
 Set the reference to the specified genome sequence object. More...
 
void setSequenceTranslation (SequenceTranslation translation)
 Set the type of sequence translation to use when getting the sequence. More...
 
Set Alignment Data

Set methods for record fields.

All of the "set" methods set the status to indicate success or the failure reason.

bool setReadName (const char *readName)
 Set QNAME to the passed in name. More...
 
bool setFlag (uint16_t flag)
 Set the bitwise FLAG to the specified value. More...
 
bool setReferenceName (SamFileHeader &header, const char *referenceName)
 Set the reference sequence name (RNAME) to the specified name, using the header to determine the reference id. More...
 
bool set1BasedPosition (int32_t position)
 Set the leftmost position (POS) using the specified 1-based (SAM format) value. More...
 
bool set0BasedPosition (int32_t position)
 Set the leftmost position using the specified 0-based (BAM format) value. More...
 
bool setMapQuality (uint8_t mapQuality)
 Set the mapping quality (MAPQ). More...
 
bool setCigar (const char *cigar)
 Set the CIGAR to the specified SAM formatted cigar string. More...
 
bool setCigar (const Cigar &cigar)
 Set the CIGAR to the specified Cigar object. More...
 
bool setMateReferenceName (SamFileHeader &header, const char *mateReferenceName)
 Set the mate/next fragment's reference sequence name (RNEXT) to the specified name, using the header to determine the mate reference id. More...
 
bool set1BasedMatePosition (int32_t matePosition)
 Set the mate/next fragment's leftmost position (PNEXT) using the specified 1-based (SAM format) value. More...
 
bool set0BasedMatePosition (int32_t matePosition)
 Set the mate/next fragment's leftmost position using the specified 0-based (BAM format) value. More...
 
bool setInsertSize (int32_t insertSize)
 Sets the inferred insert size (ISIZE)/observed template length (TLEN). More...
 
bool setSequence (const char *seq)
 Sets the sequence (SEQ) to the specified SAM formatted sequence string. More...
 
bool setQuality (const char *quality)
 Sets the quality (QUAL) to the specified SAM formatted quality string. More...
 
bool shiftIndelsLeft ()
 Shift the indels (if any) to the left by updating the CIGAR. More...
 
SamStatus::Status setBuffer (const char *fromBuffer, uint32_t fromBufferSize, SamFileHeader &header)
 Sets the SamRecord to contain the information in the BAM formatted fromBuffer. More...
 
SamStatus::Status setBufferFromFile (IFILE filePtr, SamFileHeader &header)
 Read the BAM record from a file. More...
 
Set Tag Data

Set methods for tags.

bool addIntTag (const char *tag, int32_t value)
 Add the specified integer tag to the record. More...
 
bool addTag (const char *tag, char vtype, const char *value)
 Add the specified tag,vtype,value to the record. More...
 
void clearTags ()
 Clear the tags in this record. More...
 
bool rmTag (const char *tag, char type)
 Remove a tag. More...
 
bool rmTags (const char *tags)
 Remove tags. More...
 
Get Alignment Data

Get methods for record fields.

All of the "get" methods set the status to indicate success or the failure reason.

const void * getRecordBuffer ()
 Get a const pointer to the buffer that contains the BAM representation of the record. More...
 
const void * getRecordBuffer (SequenceTranslation translation)
 Get a const pointer to the buffer that contains the BAM representation of the record using the specified translation on the sequence. More...
 
SamStatus::Status writeRecordBuffer (IFILE filePtr)
 Write the record as a BAM into the specified already opened file. More...
 
SamStatus::Status writeRecordBuffer (IFILE filePtr, SequenceTranslation translation)
 Write the record as a BAM into the specified already opened file using the specified translation on the sequence. More...
 
int32_t getBlockSize ()
 Get the block size of the record (BAM format). More...
 
const char * getReferenceName ()
 Get the reference sequence name (RNAME) of the record. More...
 
int32_t getReferenceID ()
 Get the reference sequence id of the record (BAM format rid). More...
 
int32_t get1BasedPosition ()
 Get the 1-based(SAM) leftmost position (POS) of the record. More...
 
int32_t get0BasedPosition ()
 Get the 0-based(BAM) leftmost position of the record. More...
 
uint8_t getReadNameLength ()
 Get the length of the readname (QNAME) including the null. More...
 
uint8_t getMapQuality ()
 Get the mapping quality (MAPQ) of the record. More...
 
uint16_t getBin ()
 Get the BAM bin for the record. More...
 
uint16_t getCigarLength ()
 Get the length of the BAM formatted CIGAR. More...
 
uint16_t getFlag ()
 Get the flag (FLAG). More...
 
int32_t getReadLength ()
 Get the length of the read. More...
 
const char * getMateReferenceName ()
 Get the mate/next fragment's reference sequence name (RNEXT). More...
 
const char * getMateReferenceNameOrEqual ()
 Get the mate/next fragment's reference sequence name (RNEXT), returning "=" if it is the same as the reference name, unless they are both "*" in which case "*" is returned. More...
 
int32_t getMateReferenceID ()
 Get the mate reference id of the record (BAM format: mate_rid/next_refID). More...
 
int32_t get1BasedMatePosition ()
 Get the 1-based(SAM) leftmost mate/next fragment's position (PNEXT). More...
 
int32_t get0BasedMatePosition ()
 Get the 0-based(BAM) leftmost mate/next fragment's position. More...
 
int32_t getInsertSize ()
 Get the inferred insert size of the read pair (ISIZE) or observed template length (TLEN). More...
 
int32_t get0BasedAlignmentEnd ()
 Returns the 0-based inclusive rightmost position of the clipped sequence. More...
 
int32_t get1BasedAlignmentEnd ()
 Returns the 1-based inclusive rightmost position of the clipped sequence. More...
 
int32_t getAlignmentLength ()
 Returns the length of the clipped sequence, returning 0 if the cigar is '*'. More...
 
int32_t get0BasedUnclippedStart ()
 Returns the 0-based inclusive left-most position adjusted for clipped bases. More...
 
int32_t get1BasedUnclippedStart ()
 Returns the 1-based inclusive left-most position adjusted for clipped bases. More...
 
int32_t get0BasedUnclippedEnd ()
 Returns the 0-based inclusive right-most position adjusted for clipped bases. More...
 
int32_t get1BasedUnclippedEnd ()
 Returns the 1-based inclusive right-most position adjusted for clipped bases. More...
 
const char * getReadName ()
 Returns the SAM formatted Read Name (QNAME). More...
 
const char * getCigar ()
 Returns the SAM formatted CIGAR string. More...
 
const char * getSequence ()
 Returns the SAM formatted sequence string (SEQ), translating the base as specified by setSequenceTranslation. More...
 
const char * getSequence (SequenceTranslation translation)
 Returns the SAM formatted sequence string (SEQ) performing the specified sequence translation. More...
 
const char * getQuality ()
 Returns the SAM formatted quality string (QUAL). More...
 
char getSequence (int index)
 Get the sequence base at the specified index into this sequence 0 to readLength - 1, translating the base as specified by setSequenceTranslation. More...
 
char getSequence (int index, SequenceTranslation translation)
 Get the sequence base at the specified index into this sequence 0 to readLength - 1 performing the specified sequence translation. More...
 
char getQuality (int index)
 Get the quality character at the specified index into the quality 0 to readLength - 1. More...
 
CigargetCigarInfo ()
 Returns a pointer to the Cigar object associated with this record. More...
 
uint32_t getNumOverlaps (int32_t start, int32_t end)
 Return the number of bases in this read that overlap the passed in region. More...
 
bool getFields (bamRecordStruct &recStruct, String &readName, String &cigar, String &sequence, String &quality)
 Returns the values of all fields except the tags. More...
 
bool getFields (bamRecordStruct &recStruct, String &readName, String &cigar, String &sequence, String &quality, SequenceTranslation translation)
 Returns the values of all fields except the tags using the specified sequence translation. More...
 
GenomeSequencegetReference ()
 Returns a pointer to the genome sequence object associated with this record if it was set (NULL if it was not set). More...
 

Get Tag Methods

Get methods for obtaining information on tags.

uint32_t getTagLength ()
 Returns the length of the BAM formatted tags. More...
 
bool getNextSamTag (char *tag, char &vtype, void **value)
 Get the next tag from the record. More...
 
void resetTagIter ()
 Reset the tag iterator to the beginning of the tags.
 
bool getTagsString (const char *tags, String &returnString, char delim='\t')
 Get the string representation of the tags from the record, formatted as TAG:TYPE:VALUE<delim>TAG:TYPE:VALUE... More...
 
const StringgetStringTag (const char *tag)
 Get the string value for the specified tag. More...
 
int * getIntegerTag (const char *tag)
 Get the integer value for the specified tag, DEPRECATED, use one that returns a bool (success/failure). More...
 
bool getIntegerTag (const char *tag, int &tagVal)
 Get the integer value for the specified tag. More...
 
bool getFloatTag (const char *tag, float &tagVal)
 Get the float value for the specified tag. More...
 
const StringgetString (const char *tag)
 Get the string value for the specified tag.
 
int & getInteger (const char *tag)
 Get the integer value for the specified tag, DEPRECATED, use getIntegerTag that returns a bool.
 
bool checkString (const char *tag)
 Check if the specified tag contains a string. More...
 
bool checkInteger (const char *tag)
 Check if the specified tag contains an integer. More...
 
bool checkFloat (const char *tag)
 Check if the specified tag contains a string. More...
 
bool checkTag (const char *tag, char type)
 Check if the specified tag contains a value of the specified vtype. More...
 
const SamStatusgetStatus ()
 Returns the status associated with the last method that sets the status. More...
 
static bool isIntegerType (char vtype)
 Returns whether or not the specified vtype is an integer type. More...
 
static bool isFloatType (char vtype)
 Returns whether or not the specified vtype is a float type. More...
 
static bool isCharType (char vtype)
 Returns whether or not the specified vtype is a char type. More...
 
static bool isStringType (char vtype)
 Returns whether or not the specified vtype is a string type. More...
 

Detailed Description

Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record.


Definition at line 51 of file SamRecord.h.

Member Enumeration Documentation

◆ SequenceTranslation

Enum containing the settings on how to translate the sequence if a reference is available.

If no reference is available, no translation is done.

Enumerator
NONE 

Leave the sequence as is.

EQUAL 

Translate bases that match the reference to '='.

BASES 

Translate '=' to the actual base.

Definition at line 57 of file SamRecord.h.

57  {
58  NONE, ///< Leave the sequence as is.
59  EQUAL, ///< Translate bases that match the reference to '='
60  BASES, ///< Translate '=' to the actual base.
61  };
@ NONE
Leave the sequence as is.
Definition: SamRecord.h:58
@ BASES
Translate '=' to the actual base.
Definition: SamRecord.h:60
@ EQUAL
Translate bases that match the reference to '='.
Definition: SamRecord.h:59

Constructor & Destructor Documentation

◆ SamRecord()

SamRecord::SamRecord ( ErrorHandler::HandlingType  errorHandlingType)

Constructor that sets the error handling type.

Parameters
errorHandlingTypehow to handle errors.

Definition at line 53 of file SamRecord.cpp.

54  : myStatus(errorHandlingType),
55  myRefPtr(NULL),
56  mySequenceTranslation(NONE)
57 {
58  int32_t defaultAllocSize = DEFAULT_BLOCK_SIZE + sizeof(int32_t);
59 
60  myRecordPtr =
61  (bamRecordStruct *) malloc(defaultAllocSize);
62 
63  myCigarTempBuffer = NULL;
64  myCigarTempBufferAllocatedSize = 0;
65 
66  allocatedSize = defaultAllocSize;
67 
68  resetRecord();
69 }
void resetRecord()
Reset the fields of the record to a default value.
Definition: SamRecord.cpp:91
Structure of a BAM record.
Definition: SamRecord.h:34

References resetRecord().

Member Function Documentation

◆ addIntTag()

bool SamRecord::addIntTag ( const char *  tag,
int32_t  value 
)

Add the specified integer tag to the record.

Internal processing handles switching between SAM/BAM formats when read/written and determining the type for BAM format. If the tag is already there this code will replace it if the specified value is different.

Parameters
tagtwo character tag to be added to the SAM/BAM record.
valuevalue for the specified tag.
Returns
true if the tag was successfully added, false otherwise.

Definition at line 647 of file SamRecord.cpp.

648 {
649  myStatus = SamStatus::SUCCESS;
650  int key = 0;
651  int index = 0;
652  char bamvtype;
653 
654  int tagBufferSize = 0;
655 
656  // First check to see if the tags need to be synced to the buffer.
657  if(myNeedToSetTagsFromBuffer)
658  {
659  if(!setTagsFromBuffer())
660  {
661  // Failed to read tags from the buffer, so cannot add new ones.
662  return(false);
663  }
664  }
665 
666  // Ints come in as int. But it can be represented in fewer bits.
667  // So determine a more specific type that is in line with the
668  // types for BAM files.
669  // First check to see if it is a negative.
670  if(value < 0)
671  {
672  // The int is negative, so it will need to use a signed type.
673  // See if it is greater than the min value for a char.
674  if(value > ((std::numeric_limits<char>::min)()))
675  {
676  // It can be stored in a signed char.
677  bamvtype = 'c';
678  tagBufferSize += 4;
679  }
680  else if(value > ((std::numeric_limits<short>::min)()))
681  {
682  // It fits in a signed short.
683  bamvtype = 's';
684  tagBufferSize += 5;
685  }
686  else
687  {
688  // Just store it as a signed int.
689  bamvtype = 'i';
690  tagBufferSize += 7;
691  }
692  }
693  else
694  {
695  // It is positive, so an unsigned type can be used.
696  if(value < ((std::numeric_limits<unsigned char>::max)()))
697  {
698  // It is under the max of an unsigned char.
699  bamvtype = 'C';
700  tagBufferSize += 4;
701  }
702  else if(value < ((std::numeric_limits<unsigned short>::max)()))
703  {
704  // It is under the max of an unsigned short.
705  bamvtype = 'S';
706  tagBufferSize += 5;
707  }
708  else
709  {
710  // Just store it as an unsigned int.
711  bamvtype = 'I';
712  tagBufferSize += 7;
713  }
714  }
715 
716  // Check to see if the tag is already there.
717  key = MAKEKEY(tag[0], tag[1], bamvtype);
718  unsigned int hashIndex = extras.Find(key);
719  if(hashIndex != LH_NOTFOUND)
720  {
721  // Tag was already found.
722  index = extras[hashIndex];
723 
724  // Since the tagBufferSize was already updated with the new value,
725  // subtract the size for the previous tag (even if they are the same).
726  switch(intType[index])
727  {
728  case 'c':
729  case 'C':
730  case 'A':
731  tagBufferSize -= 4;
732  break;
733  case 's':
734  case 'S':
735  tagBufferSize -= 5;
736  break;
737  case 'i':
738  case 'I':
739  tagBufferSize -= 7;
740  break;
741  default:
742  myStatus.setStatus(SamStatus::INVALID,
743  "unknown tag inttype type found.\n");
744  return(false);
745  }
746 
747  // Tag already existed, print message about overwriting.
748  // WARN about dropping duplicate tags.
749  if(myNumWarns++ < myMaxWarns)
750  {
751  String newVal;
752  String origVal;
753  appendIntArrayValue(index, origVal);
754  appendIntArrayValue(bamvtype, value, newVal);
755  fprintf(stderr, "WARNING: Duplicate Tags, overwritting %c%c:%c:%s with %c%c:%c:%s\n",
756  tag[0], tag[1], intType[index], origVal.c_str(), tag[0], tag[1], bamvtype, newVal.c_str());
757  if(myNumWarns == myMaxWarns)
758  {
759  fprintf(stderr, "Suppressing rest of Duplicate Tag warnings.\n");
760  }
761  }
762 
763  // Update the integer value and type.
764  integers[index] = value;
765  intType[index] = bamvtype;
766  }
767  else
768  {
769  // Tag is not already there, so add it.
770  index = integers.Length();
771 
772  integers.Push(value);
773  intType.push_back(bamvtype);
774 
775  extras.Add(key, index);
776  }
777 
778  // The buffer tags are now out of sync.
779  myNeedToSetTagsInBuffer = true;
780  myIsTagsBufferValid = false;
781  myIsBufferSynced = false;
782  myTagBufferSize += tagBufferSize;
783 
784  return(true);
785 }
@ SUCCESS
method completed successfully.
Definition: StatGenStatus.h:32
@ INVALID
invalid other than for sorting.
Definition: StatGenStatus.h:44
void setStatus(Status newStatus, const char *newMessage)
Set the status with the specified status enum and message.

References StatGenStatus::INVALID, StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.

Referenced by addTag().

◆ addTag()

bool SamRecord::addTag ( const char *  tag,
char  vtype,
const char *  value 
)

Add the specified tag,vtype,value to the record.

Vtype can be SAM/BAM format. Internal processing handles switching between SAM/BAM formats when read/written. If the tag is already there this code will replace it if the specified value is different.

Parameters
tagtwo character tag to be added to the SAM/BAM record.
vtypevtype of the specified value - either SAM/BAM vtypes.
valuevalue as a string for the specified tag.
Returns
true if the tag was successfully added, false otherwise.

Definition at line 791 of file SamRecord.cpp.

792 {
793  if(vtype == 'i')
794  {
795  // integer type. Call addIntTag to handle it.
796  int intVal = atoi(valuePtr);
797  return(addIntTag(tag, intVal));
798  }
799 
800  // Non-int type.
801  myStatus = SamStatus::SUCCESS;
802  bool status = true; // default to successful.
803  int key = 0;
804  int index = 0;
805 
806  int tagBufferSize = 0;
807 
808  // First check to see if the tags need to be synced to the buffer.
809  if(myNeedToSetTagsFromBuffer)
810  {
811  if(!setTagsFromBuffer())
812  {
813  // Failed to read tags from the buffer, so cannot add new ones.
814  return(false);
815  }
816  }
817 
818  // First check to see if the tag is already there.
819  key = MAKEKEY(tag[0], tag[1], vtype);
820  unsigned int hashIndex = extras.Find(key);
821  if(hashIndex != LH_NOTFOUND)
822  {
823  // The key was found in the hash, so get the lookup index.
824  index = extras[hashIndex];
825 
826  String origTag;
827  char origType = vtype;
828 
829  // Adjust the currently pointed to value to the new setting.
830  switch (vtype)
831  {
832  case 'A' :
833  // First check to see if the value changed.
834  if((integers[index] == (const int)*(valuePtr)) &&
835  (intType[index] == vtype))
836  {
837  // The value & type has not changed, so do nothing.
838  return(true);
839  }
840  else
841  {
842  // Tag buffer size changes if type changes, so subtract & add.
843  origType = intType[index];
844  appendIntArrayValue(index, origTag);
845  tagBufferSize -= getNumericTagTypeSize(intType[index]);
846  tagBufferSize += getNumericTagTypeSize(vtype);
847  integers[index] = (const int)*(valuePtr);
848  intType[index] = vtype;
849  }
850  break;
851  case 'Z' :
852  // First check to see if the value changed.
853  if(strings[index] == valuePtr)
854  {
855  // The value has not changed, so do nothing.
856  return(true);
857  }
858  else
859  {
860  // Adjust the tagBufferSize by removing the size of the old string.
861  origTag = strings[index];
862  tagBufferSize -= strings[index].Length();
863  strings[index] = valuePtr;
864  // Adjust the tagBufferSize by adding the size of the new string.
865  tagBufferSize += strings[index].Length();
866  }
867  break;
868  case 'B' :
869  // First check to see if the value changed.
870  if(strings[index] == valuePtr)
871  {
872  // The value has not changed, so do nothing.
873  return(true);
874  }
875  else
876  {
877  // Adjust the tagBufferSize by removing the size of the old field.
878  origTag = strings[index];
879  tagBufferSize -= getBtagBufferSize(strings[index]);
880  strings[index] = valuePtr;
881  // Adjust the tagBufferSize by adding the size of the new field.
882  tagBufferSize += getBtagBufferSize(strings[index]);
883  }
884  break;
885  case 'f' :
886  // First check to see if the value changed.
887  if(floats[index] == (float)atof(valuePtr))
888  {
889  // The value has not changed, so do nothing.
890  return(true);
891  }
892  else
893  {
894  // Tag buffer size doesn't change between different 'f' entries.
895  origTag.appendFullFloat(floats[index]);
896  floats[index] = (float)atof(valuePtr);
897  }
898  break;
899  default :
900  fprintf(stderr,
901  "samRecord::addTag() - Unknown custom field of type %c\n",
902  vtype);
904  "Unknown custom field in a tag");
905  status = false;
906  break;
907  }
908 
909  // Duplicate tag in this record.
910  // Tag already existed, print message about overwriting.
911  // WARN about dropping duplicate tags.
912  if(myNumWarns++ < myMaxWarns)
913  {
914  fprintf(stderr, "WARNING: Duplicate Tags, overwritting %c%c:%c:%s with %c%c:%c:%s\n",
915  tag[0], tag[1], origType, origTag.c_str(), tag[0], tag[1], vtype, valuePtr);
916  if(myNumWarns == myMaxWarns)
917  {
918  fprintf(stderr, "Suppressing rest of Duplicate Tag warnings.\n");
919  }
920  }
921  }
922  else
923  {
924  // The key was not found in the hash, so add it.
925  switch (vtype)
926  {
927  case 'A' :
928  index = integers.Length();
929  integers.Push((const int)*(valuePtr));
930  intType.push_back(vtype);
931  tagBufferSize += 4;
932  break;
933  case 'Z' :
934  index = strings.Length();
935  strings.Push(valuePtr);
936  tagBufferSize += 4 + strings.Last().Length();
937  break;
938  case 'B' :
939  index = strings.Length();
940  strings.Push(valuePtr);
941  tagBufferSize += 3 + getBtagBufferSize(strings[index]);
942  break;
943  case 'f' :
944  index = floats.size();
945  floats.push_back((float)atof(valuePtr));
946  tagBufferSize += 7;
947  break;
948  default :
949  fprintf(stderr,
950  "samRecord::addTag() - Unknown custom field of type %c\n",
951  vtype);
953  "Unknown custom field in a tag");
954  status = false;
955  break;
956  }
957  if(status)
958  {
959  // If successful, add the key to extras.
960  extras.Add(key, index);
961  }
962  }
963 
964  // Only add the tag if it has so far been successfully processed.
965  if(status)
966  {
967  // The buffer tags are now out of sync.
968  myNeedToSetTagsInBuffer = true;
969  myIsTagsBufferValid = false;
970  myIsBufferSynced = false;
971  myTagBufferSize += tagBufferSize;
972  }
973  return(status);
974 }
bool addIntTag(const char *tag, int32_t value)
Add the specified integer tag to the record.
Definition: SamRecord.cpp:647
@ FAIL_PARSE
failed to parse a record/header - invalid format.
Definition: StatGenStatus.h:42

References addIntTag(), StatGenStatus::FAIL_PARSE, StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.

◆ checkFloat()

bool SamRecord::checkFloat ( const char *  tag)
inline

Check if the specified tag contains a string.

Does not set SamStatus.

Parameters
tagSAM tag to check contents of.
Returns
true if the value associated with the tag is a string.

Definition at line 613 of file SamRecord.h.

613 { return checkTag(tag, 'f'); }
bool checkTag(const char *tag, char type)
Check if the specified tag contains a value of the specified vtype.
Definition: SamRecord.cpp:2381

References checkTag().

◆ checkInteger()

bool SamRecord::checkInteger ( const char *  tag)
inline

Check if the specified tag contains an integer.

Does not set SamStatus.

Parameters
tagSAM tag to check contents of.
Returns
true if the value associated with the tag is a string.

Definition at line 607 of file SamRecord.h.

607 { return checkTag(tag, 'i'); }

References checkTag().

◆ checkString()

bool SamRecord::checkString ( const char *  tag)
inline

Check if the specified tag contains a string.

Does not set SamStatus.

Parameters
tagSAM tag to check contents of.
Returns
true if the value associated with the tag is a string.

Definition at line 600 of file SamRecord.h.

601  { return(checkTag(tag, 'Z') || checkTag(tag, 'B')); }

References checkTag().

◆ checkTag()

bool SamRecord::checkTag ( const char *  tag,
char  type 
)

Check if the specified tag contains a value of the specified vtype.

Does not set SamStatus.

Parameters
tagSAM tag to check contents of.
typevalue type to check if the SAM tag matches.
Returns
true if the value associated with the tag is a string.

Definition at line 2381 of file SamRecord.cpp.

2382 {
2383  // Init to success.
2384  myStatus = SamStatus::SUCCESS;
2385  // Parse the buffer if necessary.
2386  if(myNeedToSetTagsFromBuffer)
2387  {
2388  if(!setTagsFromBuffer())
2389  {
2390  // Failed to read the tags from the buffer, so cannot
2391  // get tags. setTagsFromBuffer set the error.
2392  return("");
2393  }
2394  }
2395 
2396  int key = MAKEKEY(tag[0], tag[1], type);
2397 
2398  return (extras.Find(key) != LH_NOTFOUND);
2399 }

References StatGenStatus::SUCCESS.

Referenced by checkFloat(), checkInteger(), and checkString().

◆ clearTags()

void SamRecord::clearTags ( )

Clear the tags in this record.

Does not set SamStatus.

Definition at line 977 of file SamRecord.cpp.

978 {
979  if(extras.Entries() != 0)
980  {
981  extras.Clear();
982  }
983  strings.Clear();
984  integers.Clear();
985  intType.clear();
986  floats.clear();
987  myTagBufferSize = 0;
988  resetTagIter();
989 }
void resetTagIter()
Reset the tag iterator to the beginning of the tags.
Definition: SamRecord.cpp:2034

References resetTagIter().

Referenced by resetRecord().

◆ get0BasedAlignmentEnd()

int32_t SamRecord::get0BasedAlignmentEnd ( )

Returns the 0-based inclusive rightmost position of the clipped sequence.

Returns
0-based inclusive rightmost position

Definition at line 1467 of file SamRecord.cpp.

1468 {
1469  myStatus = SamStatus::SUCCESS;
1470  if(myAlignmentLength == -1)
1471  {
1472  // Alignment end has not been set, so calculate it.
1473  parseCigar();
1474  }
1475  // If alignment length > 0, subtract 1 from it to get the end.
1476  if(myAlignmentLength == 0)
1477  {
1478  // Length is 0, just return the start position.
1479  return(myRecordPtr->myPosition);
1480  }
1481  return(myRecordPtr->myPosition + myAlignmentLength - 1);
1482 }

References StatGenStatus::SUCCESS.

Referenced by get0BasedUnclippedEnd(), get1BasedAlignmentEnd(), Pileup< PILEUP_TYPE, FUNC_CLASS >::processAlignment(), Pileup< PILEUP_TYPE, FUNC_CLASS >::processAlignmentRegion(), and CigarHelper::softClipEndByRefPos().

◆ get0BasedMatePosition()

int32_t SamRecord::get0BasedMatePosition ( )

Get the 0-based(BAM) leftmost mate/next fragment's position.

Returns
0-based leftmost position.

Definition at line 1452 of file SamRecord.cpp.

1453 {
1454  myStatus = SamStatus::SUCCESS;
1455  return myRecordPtr->myMatePosition;
1456 }

References StatGenStatus::SUCCESS.

◆ get0BasedPosition()

◆ get0BasedUnclippedEnd()

int32_t SamRecord::get0BasedUnclippedEnd ( )

Returns the 0-based inclusive right-most position adjusted for clipped bases.

Returns
0-based inclusive rightmost position including clips.

Definition at line 1526 of file SamRecord.cpp.

1527 {
1528  // myUnclippedEndOffset will be set by get0BasedAlignmentEnd if the
1529  // cigar has not yet been parsed, so no need to check it here.
1530  return(get0BasedAlignmentEnd() + myUnclippedEndOffset);
1531 }
int32_t get0BasedAlignmentEnd()
Returns the 0-based inclusive rightmost position of the clipped sequence.
Definition: SamRecord.cpp:1467

References get0BasedAlignmentEnd().

Referenced by get1BasedUnclippedEnd().

◆ get0BasedUnclippedStart()

int32_t SamRecord::get0BasedUnclippedStart ( )

Returns the 0-based inclusive left-most position adjusted for clipped bases.

Returns
0-based inclusive leftmost position including clips.

Definition at line 1506 of file SamRecord.cpp.

1507 {
1508  myStatus = SamStatus::SUCCESS;
1509  if(myUnclippedStartOffset == -1)
1510  {
1511  // Unclipped has not yet been calculated, so parse the cigar to get it
1512  parseCigar();
1513  }
1514  return(myRecordPtr->myPosition - myUnclippedStartOffset);
1515 }

References StatGenStatus::SUCCESS.

Referenced by get1BasedUnclippedStart().

◆ get1BasedAlignmentEnd()

int32_t SamRecord::get1BasedAlignmentEnd ( )

Returns the 1-based inclusive rightmost position of the clipped sequence.

Returns
1-based inclusive rightmost position

Definition at line 1486 of file SamRecord.cpp.

1487 {
1488  return(get0BasedAlignmentEnd() + 1);
1489 }

References get0BasedAlignmentEnd().

Referenced by getBin().

◆ get1BasedMatePosition()

int32_t SamRecord::get1BasedMatePosition ( )

Get the 1-based(SAM) leftmost mate/next fragment's position (PNEXT).

Returns
1-based leftmost position.

Definition at line 1445 of file SamRecord.cpp.

1446 {
1447  myStatus = SamStatus::SUCCESS;
1448  return (myRecordPtr->myMatePosition + 1);
1449 }

References StatGenStatus::SUCCESS.

◆ get1BasedPosition()

int32_t SamRecord::get1BasedPosition ( )

Get the 1-based(SAM) leftmost position (POS) of the record.

Returns
1-based leftmost position.

Definition at line 1312 of file SamRecord.cpp.

1313 {
1314  myStatus = SamStatus::SUCCESS;
1315  return (myRecordPtr->myPosition + 1);
1316 }

References StatGenStatus::SUCCESS.

Referenced by SamValidator::isValid().

◆ get1BasedUnclippedEnd()

int32_t SamRecord::get1BasedUnclippedEnd ( )

Returns the 1-based inclusive right-most position adjusted for clipped bases.

Returns
1-based inclusive rightmost position including clips.

Definition at line 1535 of file SamRecord.cpp.

1536 {
1537  return(get0BasedUnclippedEnd() + 1);
1538 }
int32_t get0BasedUnclippedEnd()
Returns the 0-based inclusive right-most position adjusted for clipped bases.
Definition: SamRecord.cpp:1526

References get0BasedUnclippedEnd().

◆ get1BasedUnclippedStart()

int32_t SamRecord::get1BasedUnclippedStart ( )

Returns the 1-based inclusive left-most position adjusted for clipped bases.

Returns
1-based inclusive leftmost position including clips.

Definition at line 1519 of file SamRecord.cpp.

1520 {
1521  return(get0BasedUnclippedStart() + 1);
1522 }
int32_t get0BasedUnclippedStart()
Returns the 0-based inclusive left-most position adjusted for clipped bases.
Definition: SamRecord.cpp:1506

References get0BasedUnclippedStart().

◆ getAlignmentLength()

int32_t SamRecord::getAlignmentLength ( )

Returns the length of the clipped sequence, returning 0 if the cigar is '*'.

Returns
length of the clipped sequence.

Definition at line 1493 of file SamRecord.cpp.

1494 {
1495  myStatus = SamStatus::SUCCESS;
1496  if(myAlignmentLength == -1)
1497  {
1498  // Alignment end has not been set, so calculate it.
1499  parseCigar();
1500  }
1501  // Return the alignment length.
1502  return(myAlignmentLength);
1503 }

References StatGenStatus::SUCCESS.

◆ getBin()

uint16_t SamRecord::getBin ( )

Get the BAM bin for the record.

Returns
BAM bin

Definition at line 1347 of file SamRecord.cpp.

1348 {
1349  myStatus = SamStatus::SUCCESS;
1350  if(!myIsBinValid)
1351  {
1352  // The bin that is set in the record is not valid, so
1353  // reset it.
1354  myRecordPtr->myBin =
1355  bam_reg2bin(myRecordPtr->myPosition, get1BasedAlignmentEnd());
1356  myIsBinValid = true;
1357  }
1358  return(myRecordPtr->myBin);
1359 }
int32_t get1BasedAlignmentEnd()
Returns the 1-based inclusive rightmost position of the clipped sequence.
Definition: SamRecord.cpp:1486

References get1BasedAlignmentEnd(), and StatGenStatus::SUCCESS.

◆ getBlockSize()

int32_t SamRecord::getBlockSize ( )

Get the block size of the record (BAM format).

Returns
BAM block size of the record.

Definition at line 1281 of file SamRecord.cpp.

1282 {
1283  myStatus = SamStatus::SUCCESS;
1284  // If the buffer isn't synced, sync the buffer to determine the
1285  // block size.
1286  if(myIsBufferSynced == false)
1287  {
1288  // Since this just returns the block size, the translation of
1289  // the sequence does not matter, so just use the currently set
1290  // value.
1291  fixBuffer(myBufferSequenceTranslation);
1292  }
1293  return myRecordPtr->myBlockSize;
1294 }

References StatGenStatus::SUCCESS.

◆ getCigar()

const char * SamRecord::getCigar ( )

Returns the SAM formatted CIGAR string.

Returns
cigar string.

Definition at line 1555 of file SamRecord.cpp.

1556 {
1557  myStatus = SamStatus::SUCCESS;
1558  if(myCigar.Length() == 0)
1559  {
1560  // 0 Length, means that it is in the buffer, but has not yet
1561  // been synced to the string, so do the sync.
1562  parseCigarBinary();
1563  }
1564  return myCigar.c_str();
1565 }

References StatGenStatus::SUCCESS.

Referenced by getFields(), SamValidator::isValidCigar(), CigarHelper::softClipBeginByRefPos(), and CigarHelper::softClipEndByRefPos().

◆ getCigarInfo()

Cigar * SamRecord::getCigarInfo ( )

Returns a pointer to the Cigar object associated with this record.


The object is essentially read-only, only allowing modifications due to lazy evaluations.

Returns
pointer to the Cigar object.

Definition at line 1836 of file SamRecord.cpp.

1837 {
1838  // Check to see whether or not the Cigar has already been
1839  // set - this is determined by checking if alignment length
1840  // is set since alignment length and the cigar are set
1841  // at the same time.
1842  if(myAlignmentLength == -1)
1843  {
1844  // Not been set, so calculate it.
1845  parseCigar();
1846  }
1847  return(&myCigarRoller);
1848 }

Referenced by PileupElementBaseQual::addEntry(), SamRecordHelper::checkSequence(), SamTags::createMDTag(), getSequence(), SamQuerySeqWithRefIter::reset(), SamFilter::softClip(), CigarHelper::softClipBeginByRefPos(), and CigarHelper::softClipEndByRefPos().

◆ getCigarLength()

uint16_t SamRecord::getCigarLength ( )

Get the length of the BAM formatted CIGAR.

Returns
length of BAM formatted cigar.

Definition at line 1362 of file SamRecord.cpp.

1363 {
1364  myStatus = SamStatus::SUCCESS;
1365  // If the cigar buffer is valid
1366  // then get the length from there.
1367  if(myIsCigarBufferValid)
1368  {
1369  return myRecordPtr->myCigarLength;
1370  }
1371 
1372  if(myCigarTempBufferLength == -1)
1373  {
1374  // The cigar buffer is not valid and the cigar temp buffer is not set,
1375  // so parse the string.
1376  parseCigarString();
1377  }
1378 
1379  // The temp buffer is now set, so return the size.
1380  return(myCigarTempBufferLength);
1381 }

References StatGenStatus::SUCCESS.

◆ getFields() [1/2]

bool SamRecord::getFields ( bamRecordStruct recStruct,
String readName,
String cigar,
String sequence,
String quality 
)

Returns the values of all fields except the tags.

Parameters
recStructstructure containing the contents of all non-variable length fields.
readNameread name from the record (return param)
cigarcigar string from the record (return param)
sequencesequence string from the record (return param)
qualityquality string from the record (return param)
Returns
true if all fields were successfully set, false otherwise.

Definition at line 1866 of file SamRecord.cpp.

1868 {
1869  return(getFields(recStruct, readName, cigar, sequence, quality,
1870  mySequenceTranslation));
1871 }
bool getFields(bamRecordStruct &recStruct, String &readName, String &cigar, String &sequence, String &quality)
Returns the values of all fields except the tags.
Definition: SamRecord.cpp:1866

◆ getFields() [2/2]

bool SamRecord::getFields ( bamRecordStruct recStruct,
String readName,
String cigar,
String sequence,
String quality,
SequenceTranslation  translation 
)

Returns the values of all fields except the tags using the specified sequence translation.

Parameters
recStructstructure containing the contents of all non-variable length fields.
readNameread name from the record (return param)
cigarcigar string from the record (return param)
sequencesequence string from the record (return param)
qualityquality string from the record (return param)
translationtype of sequence translation to use.
Returns
true if all fields were successfully set, false otherwise.

Definition at line 1875 of file SamRecord.cpp.

1878 {
1879  myStatus = SamStatus::SUCCESS;
1880  if(myIsBufferSynced == false)
1881  {
1882  if(!fixBuffer(translation))
1883  {
1884  // failed to set the buffer, return false.
1885  return(false);
1886  }
1887  }
1888  memcpy(&recStruct, myRecordPtr, sizeof(bamRecordStruct));
1889 
1890  readName = getReadName();
1891  // Check the status.
1892  if(myStatus != SamStatus::SUCCESS)
1893  {
1894  // Failed to set the fields, return false.
1895  return(false);
1896  }
1897  cigar = getCigar();
1898  // Check the status.
1899  if(myStatus != SamStatus::SUCCESS)
1900  {
1901  // Failed to set the fields, return false.
1902  return(false);
1903  }
1904  sequence = getSequence(translation);
1905  // Check the status.
1906  if(myStatus != SamStatus::SUCCESS)
1907  {
1908  // Failed to set the fields, return false.
1909  return(false);
1910  }
1911  quality = getQuality();
1912  // Check the status.
1913  if(myStatus != SamStatus::SUCCESS)
1914  {
1915  // Failed to set the fields, return false.
1916  return(false);
1917  }
1918  return(true);
1919 }
const char * getCigar()
Returns the SAM formatted CIGAR string.
Definition: SamRecord.cpp:1555
const char * getReadName()
Returns the SAM formatted Read Name (QNAME).
Definition: SamRecord.cpp:1542
const char * getQuality()
Returns the SAM formatted quality string (QUAL).
Definition: SamRecord.cpp:1638
const char * getSequence()
Returns the SAM formatted sequence string (SEQ), translating the base as specified by setSequenceTran...
Definition: SamRecord.cpp:1568

References getCigar(), getQuality(), getReadName(), getSequence(), and StatGenStatus::SUCCESS.

◆ getFlag()

uint16_t SamRecord::getFlag ( )

Get the flag (FLAG).

Returns
flag.

Definition at line 1384 of file SamRecord.cpp.

1385 {
1386  myStatus = SamStatus::SUCCESS;
1387  return myRecordPtr->myFlag;
1388 }

References StatGenStatus::SUCCESS.

Referenced by SamFilter::filterRead(), SamQuerySeqWithRefIter::getNextMatchMismatch(), SamValidator::isValid(), Pileup< PILEUP_TYPE, FUNC_CLASS >::processFile(), and SamFile::ReadRecord().

◆ getFloatTag()

bool SamRecord::getFloatTag ( const char *  tag,
float &  tagVal 
)

Get the float value for the specified tag.

Parameters
tagtag to retrieve
tagValreturn parameter with integer value for the tag
Returns
bool true if Float tag was found and tagVal was set, false if not.

Definition at line 2281 of file SamRecord.cpp.

2282 {
2283  // Init to success.
2284  myStatus = SamStatus::SUCCESS;
2285  // Parse the buffer if necessary.
2286  if(myNeedToSetTagsFromBuffer)
2287  {
2288  if(!setTagsFromBuffer())
2289  {
2290  // Failed to read the tags from the buffer, so cannot
2291  // get tags. setTagsFromBuffer set the errors,
2292  // so just return false.
2293  return(false);
2294  }
2295  }
2296 
2297  int key = MAKEKEY(tag[0], tag[1], 'f');
2298  int offset = extras.Find(key);
2299 
2300  int value;
2301  if (offset < 0)
2302  {
2303  // Failed to find the tag.
2304  return(false);
2305  }
2306  else
2307  value = extras[offset];
2308 
2309  tagVal = floats[value];
2310  return(true);
2311 }

References StatGenStatus::SUCCESS.

◆ getInsertSize()

int32_t SamRecord::getInsertSize ( )

Get the inferred insert size of the read pair (ISIZE) or observed template length (TLEN).

Returns
inferred insert size or observed template length.

Definition at line 1459 of file SamRecord.cpp.

1460 {
1461  myStatus = SamStatus::SUCCESS;
1462  return myRecordPtr->myInsertSize;
1463 }

References StatGenStatus::SUCCESS.

◆ getIntegerTag() [1/2]

int * SamRecord::getIntegerTag ( const char *  tag)

Get the integer value for the specified tag, DEPRECATED, use one that returns a bool (success/failure).

Parameters
tagtag to retrieve \retun pointer to the tag's integer value if found, NULL if not found.

Definition at line 2216 of file SamRecord.cpp.

2217 {
2218  // Init to success.
2219  myStatus = SamStatus::SUCCESS;
2220  // Parse the buffer if necessary.
2221  if(myNeedToSetTagsFromBuffer)
2222  {
2223  if(!setTagsFromBuffer())
2224  {
2225  // Failed to read the tags from the buffer, so cannot
2226  // get tags. setTagsFromBuffer set the errors,
2227  // so just return NULL.
2228  return(NULL);
2229  }
2230  }
2231 
2232  int key = MAKEKEY(tag[0], tag[1], 'i');
2233  int offset = extras.Find(key);
2234 
2235  int value;
2236  if (offset < 0)
2237  {
2238  // Failed to find the tag.
2239  return(NULL);
2240  }
2241  else
2242  value = extras[offset];
2243 
2244  return(&(integers[value]));
2245 }

References StatGenStatus::SUCCESS.

◆ getIntegerTag() [2/2]

bool SamRecord::getIntegerTag ( const char *  tag,
int &  tagVal 
)

Get the integer value for the specified tag.

Parameters
tagtag to retrieve
tagValreturn parameter with integer value for the tag \retun bool true if Integer tag was found and tagVal was set, false if not.

Definition at line 2248 of file SamRecord.cpp.

2249 {
2250  // Init to success.
2251  myStatus = SamStatus::SUCCESS;
2252  // Parse the buffer if necessary.
2253  if(myNeedToSetTagsFromBuffer)
2254  {
2255  if(!setTagsFromBuffer())
2256  {
2257  // Failed to read the tags from the buffer, so cannot
2258  // get tags. setTagsFromBuffer set the errors,
2259  // so just return false.
2260  return(false);
2261  }
2262  }
2263 
2264  int key = MAKEKEY(tag[0], tag[1], 'i');
2265  int offset = extras.Find(key);
2266 
2267  int value;
2268  if (offset < 0)
2269  {
2270  // Failed to find the tag.
2271  return(false);
2272  }
2273  else
2274  value = extras[offset];
2275 
2276  tagVal = integers[value];
2277  return(true);
2278 }

References StatGenStatus::SUCCESS.

◆ getMapQuality()

uint8_t SamRecord::getMapQuality ( )

Get the mapping quality (MAPQ) of the record.

Returns
map quality.

Definition at line 1340 of file SamRecord.cpp.

1341 {
1342  myStatus = SamStatus::SUCCESS;
1343  return myRecordPtr->myMapQuality;
1344 }

References StatGenStatus::SUCCESS.

Referenced by SamValidator::isValid().

◆ getMateReferenceID()

int32_t SamRecord::getMateReferenceID ( )

Get the mate reference id of the record (BAM format: mate_rid/next_refID).

Returns
reference id

Definition at line 1438 of file SamRecord.cpp.

1439 {
1440  myStatus = SamStatus::SUCCESS;
1441  return myRecordPtr->myMateReferenceID;
1442 }

References StatGenStatus::SUCCESS.

◆ getMateReferenceName()

const char * SamRecord::getMateReferenceName ( )

Get the mate/next fragment's reference sequence name (RNEXT).

If it is equal to the reference name, it still returns the reference name.

Returns
reference sequence name

Definition at line 1410 of file SamRecord.cpp.

1411 {
1412  myStatus = SamStatus::SUCCESS;
1413  return myMateReferenceName.c_str();
1414 }

References StatGenStatus::SUCCESS.

◆ getMateReferenceNameOrEqual()

const char * SamRecord::getMateReferenceNameOrEqual ( )

Get the mate/next fragment's reference sequence name (RNEXT), returning "=" if it is the same as the reference name, unless they are both "*" in which case "*" is returned.

Returns
reference sequence name or '='

Definition at line 1420 of file SamRecord.cpp.

1421 {
1422  myStatus = SamStatus::SUCCESS;
1423  if(myMateReferenceName == "*")
1424  {
1425  return(myMateReferenceName);
1426  }
1427  if(myMateReferenceName == getReferenceName())
1428  {
1429  return(FIELD_ABSENT_STRING);
1430  }
1431  else
1432  {
1433  return(myMateReferenceName);
1434  }
1435 }
const char * getReferenceName()
Get the reference sequence name (RNAME) of the record.
Definition: SamRecord.cpp:1298

References getReferenceName(), and StatGenStatus::SUCCESS.

◆ getNextSamTag()

bool SamRecord::getNextSamTag ( char *  tag,
char &  vtype,
void **  value 
)

Get the next tag from the record.

Sets the Status to SUCCESS when a tag is successfully returned or when there are no more tags. Otherwise the status is set to describe why it failed (parsing, etc).

Parameters
tagset to the tag when a tag is read.
vtypeset to the vtype when a tag is read.
valuepointer to the value of the tag (will need to cast to int, float, char, or string based on vtype).
Returns
true if a tag was read, false if there are no more tags.

Definition at line 1962 of file SamRecord.cpp.

1963 {
1964  myStatus = SamStatus::SUCCESS;
1965  if(myNeedToSetTagsFromBuffer)
1966  {
1967  if(!setTagsFromBuffer())
1968  {
1969  // Failed to read the tags from the buffer, so cannot
1970  // get tags.
1971  return(false);
1972  }
1973  }
1974 
1975  // Increment the tag index to start looking at the next tag.
1976  // At the beginning, it is set to -1.
1977  myLastTagIndex++;
1978  int maxTagIndex = extras.Capacity();
1979  if(myLastTagIndex >= maxTagIndex)
1980  {
1981  // Hit the end of the tags, return false, no more tags.
1982  // Status is still success since this is not an error,
1983  // it is just the end of the list.
1984  return(false);
1985  }
1986 
1987  bool tagFound = false;
1988  // Loop until a tag is found or the end of extras is hit.
1989  while((tagFound == false) && (myLastTagIndex < maxTagIndex))
1990  {
1991  if(extras.SlotInUse(myLastTagIndex))
1992  {
1993  // Found a slot to use.
1994  int key = extras.GetKey(myLastTagIndex);
1995  getTag(key, tag);
1996  getTypeFromKey(key, vtype);
1997  tagFound = true;
1998  // Get the value associated with the key based on the vtype.
1999  switch (vtype)
2000  {
2001  case 'f' :
2002  *value = getFloatPtr(myLastTagIndex);
2003  break;
2004  case 'i' :
2005  *value = getIntegerPtr(myLastTagIndex, vtype);
2006  if(vtype != 'A')
2007  {
2008  // Convert all int types to 'i'
2009  vtype = 'i';
2010  }
2011  break;
2012  case 'Z' :
2013  case 'B' :
2014  *value = getStringPtr(myLastTagIndex);
2015  break;
2016  default:
2018  "Unknown tag type");
2019  tagFound = false;
2020  break;
2021  }
2022  }
2023  if(!tagFound)
2024  {
2025  // Increment the index since a tag was not found.
2026  myLastTagIndex++;
2027  }
2028  }
2029  return(tagFound);
2030 }

References StatGenStatus::FAIL_PARSE, StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.

Referenced by SamRecordHelper::genSamTagsString().

◆ getNumOverlaps()

uint32_t SamRecord::getNumOverlaps ( int32_t  start,
int32_t  end 
)

Return the number of bases in this read that overlap the passed in region.

Matches & mismatches between the read and the reference are counted as overlaps, but insertions, deletions, skips, clips, and pads are not counted.

Parameters
startinclusive 0-based start position (reference position) of the region to check for overlaps in. (-1 indicates to start at the beginning of the reference.)
endexclusive 0-based end position (reference position) of the region to check for overlaps in. (-1 indicates to go to the end of the reference.)
Returns
number of overlapping bases

Definition at line 1853 of file SamRecord.cpp.

1854 {
1855  // Determine whether or not the cigar has been parsed, which sets up
1856  // the cigar roller. This is determined by checking the alignment length.
1857  if(myAlignmentLength == -1)
1858  {
1859  parseCigar();
1860  }
1861  return(myCigarRoller.getNumOverlaps(start, end, get0BasedPosition()));
1862 }
uint32_t getNumOverlaps(int32_t start, int32_t end, int32_t queryStartPos)
Return the number of bases that overlap the reference and the read associated with this cigar that fa...
Definition: Cigar.cpp:334
int32_t get0BasedPosition()
Get the 0-based(BAM) leftmost position of the record.
Definition: SamRecord.cpp:1319

References get0BasedPosition(), and Cigar::getNumOverlaps().

Referenced by SamFile::GetNumOverlaps().

◆ getQuality() [1/2]

const char * SamRecord::getQuality ( )

Returns the SAM formatted quality string (QUAL).

Returns
quality string.

Definition at line 1638 of file SamRecord.cpp.

1639 {
1640  myStatus = SamStatus::SUCCESS;
1641  if(myQuality.Length() == 0)
1642  {
1643  // 0 Length, means that it is in the buffer, but has not yet
1644  // been synced to the string, so do the sync.
1645  setSequenceAndQualityFromBuffer();
1646  }
1647  return myQuality.c_str();
1648 }

References StatGenStatus::SUCCESS.

Referenced by PileupElementBaseQual::addEntry(), getFields(), SamValidator::isValidQuality(), and SamFilter::sumMismatchQuality().

◆ getQuality() [2/2]

char SamRecord::getQuality ( int  index)

Get the quality character at the specified index into the quality 0 to readLength - 1.

Throws an exception if index is out of range.

Parameters
indexindex into the quality string (0 to readLength-1).
Returns
the quality character at the specified index into the quality.

Definition at line 1782 of file SamRecord.cpp.

1783 {
1784  // Determine the read length.
1785  int32_t readLen = getReadLength();
1786 
1787  // If the read length is 0, return ' ' whose ascii code is below
1788  // the minimum ascii code for qualities.
1789  if(readLen == 0)
1790  {
1792  }
1793  else if((index < 0) || (index >= readLen))
1794  {
1795  // Only get here if the index was out of range, so thow an exception.
1796  String exceptionString = "SamRecord::getQuality(";
1797  exceptionString += index;
1798  exceptionString += ") is out of range. Index must be between 0 and ";
1799  exceptionString += (readLen - 1);
1800  throw std::runtime_error(exceptionString.c_str());
1801  }
1802 
1803  if(myQuality.Length() == 0)
1804  {
1805  // Parse BAM Quality.
1806  // Know that myPackedQuality is correct since readLen != 0.
1807  return(myPackedQuality[index] + 33);
1808  }
1809  else
1810  {
1811  // Already have string.
1812  if((myQuality.Length() == 1) && (myQuality[0] == '*'))
1813  {
1814  // Return the unknown quality character.
1816  }
1817  else if(index >= myQuality.Length())
1818  {
1819  // Only get here if the index was out of range, so thow an exception.
1820  // Technically the myQuality string is not guaranteed to be the same length
1821  // as the sequence, so this catches that error.
1822  String exceptionString = "SamRecord::getQuality(";
1823  exceptionString += index;
1824  exceptionString += ") is out of range. Index must be between 0 and ";
1825  exceptionString += (myQuality.Length() - 1);
1826  throw std::runtime_error(exceptionString.c_str());
1827  }
1828  else
1829  {
1830  return(myQuality[index]);
1831  }
1832  }
1833 }
static const char UNKNOWN_QUALITY_CHAR
Character used when the quality is unknown.
Definition: BaseUtilities.h:49
int32_t getReadLength()
Get the length of the read.
Definition: SamRecord.cpp:1391

References getReadLength(), and BaseUtilities::UNKNOWN_QUALITY_CHAR.

◆ getReadLength()

int32_t SamRecord::getReadLength ( )

Get the length of the read.

Returns
read length.

Definition at line 1391 of file SamRecord.cpp.

1392 {
1393  myStatus = SamStatus::SUCCESS;
1394  if(myIsSequenceBufferValid == false)
1395  {
1396  // If the sequence is "*", then return 0.
1397  if((mySequence.Length() == 1) && (mySequence[0] == '*'))
1398  {
1399  return(0);
1400  }
1401  // Do not add 1 since it is not null terminated.
1402  return(mySequence.Length());
1403  }
1404  return(myRecordPtr->myReadLength);
1405 }

References StatGenStatus::SUCCESS.

Referenced by SamFilter::clipOnMismatchThreshold(), SamQuerySeqWithRefIter::getNextMatchMismatch(), getQuality(), getSequence(), SamValidator::isValidCigar(), SamValidator::isValidQuality(), SamQuerySeqWithRefIter::reset(), and CigarHelper::softClipEndByRefPos().

◆ getReadName()

const char * SamRecord::getReadName ( )

Returns the SAM formatted Read Name (QNAME).

Returns
read name.

Definition at line 1542 of file SamRecord.cpp.

1543 {
1544  myStatus = SamStatus::SUCCESS;
1545  if(myReadName.Length() == 0)
1546  {
1547  // 0 Length, means that it is in the buffer, but has not yet
1548  // been synced to the string, so do the sync.
1549  myReadName = (char*)&(myRecordPtr->myData);
1550  }
1551  return myReadName.c_str();
1552 }

References StatGenStatus::SUCCESS.

Referenced by getFields(), SamValidator::isValid(), and SamFile::validateSortOrder().

◆ getReadNameLength()

uint8_t SamRecord::getReadNameLength ( )

Get the length of the readname (QNAME) including the null.

Returns
length of the read name (including null).

Definition at line 1326 of file SamRecord.cpp.

1327 {
1328  myStatus = SamStatus::SUCCESS;
1329  // If the buffer is valid, return the size from there, otherwise get the
1330  // size from the string length + 1 (ending null).
1331  if(myIsReadNameBufferValid)
1332  {
1333  return(myRecordPtr->myReadNameLength);
1334  }
1335 
1336  return(myReadName.Length() + 1);
1337 }

References StatGenStatus::SUCCESS.

Referenced by SamValidator::isValid().

◆ getRecordBuffer() [1/2]

const void * SamRecord::getRecordBuffer ( )

Get a const pointer to the buffer that contains the BAM representation of the record.

Returns
const pointer to the buffer that contains the BAM representation of the record.

Definition at line 1204 of file SamRecord.cpp.

1205 {
1206  return(getRecordBuffer(mySequenceTranslation));
1207 }
const void * getRecordBuffer()
Get a const pointer to the buffer that contains the BAM representation of the record.
Definition: SamRecord.cpp:1204

◆ getRecordBuffer() [2/2]

const void * SamRecord::getRecordBuffer ( SequenceTranslation  translation)

Get a const pointer to the buffer that contains the BAM representation of the record using the specified translation on the sequence.

Parameters
translationtype of sequence translation to use.
Returns
const pointer to the buffer that contains the BAM representation of the record.

Definition at line 1211 of file SamRecord.cpp.

1212 {
1213  myStatus = SamStatus::SUCCESS;
1214  bool status = true;
1215  // If the buffer is not synced or the sequence in the buffer is not
1216  // properly translated, fix the buffer.
1217  if((myIsBufferSynced == false) ||
1218  (myBufferSequenceTranslation != translation))
1219  {
1220  status &= fixBuffer(translation);
1221  }
1222  // If the buffer is synced, check to see if the tags need to be synced.
1223  if(myNeedToSetTagsInBuffer)
1224  {
1225  status &= setTagsInBuffer();
1226  }
1227  if(!status)
1228  {
1229  return(NULL);
1230  }
1231  return (const void *)myRecordPtr;
1232 }

References StatGenStatus::SUCCESS.

◆ getReference()

GenomeSequence * SamRecord::getReference ( )

Returns a pointer to the genome sequence object associated with this record if it was set (NULL if it was not set).

Returns
pointer to the GenomeSequence object or NULL if there isn't one.

Definition at line 1923 of file SamRecord.cpp.

1924 {
1925  return(myRefPtr);
1926 }

Referenced by SamValidator::isValidTags().

◆ getReferenceID()

int32_t SamRecord::getReferenceID ( )

Get the reference sequence id of the record (BAM format rid).

Returns
reference sequence id

Definition at line 1305 of file SamRecord.cpp.

1306 {
1307  myStatus = SamStatus::SUCCESS;
1308  return myRecordPtr->myReferenceID;
1309 }

References StatGenStatus::SUCCESS.

Referenced by SamCoordOutput::add(), SamValidator::isValid(), Pileup< PILEUP_TYPE, FUNC_CLASS >::processAlignment(), Pileup< PILEUP_TYPE, FUNC_CLASS >::processAlignmentRegion(), and SamFile::validateSortOrder().

◆ getReferenceName()

const char * SamRecord::getReferenceName ( )

Get the reference sequence name (RNAME) of the record.

Returns
reference sequence name

Definition at line 1298 of file SamRecord.cpp.

1299 {
1300  myStatus = SamStatus::SUCCESS;
1301  return myReferenceName.c_str();
1302 }

References StatGenStatus::SUCCESS.

Referenced by PileupElement::addEntry(), SamTags::createMDTag(), getMateReferenceNameOrEqual(), getSequence(), SamValidator::isValid(), and SamQuerySeqWithRefIter::reset().

◆ getSequence() [1/4]

const char * SamRecord::getSequence ( )

Returns the SAM formatted sequence string (SEQ), translating the base as specified by setSequenceTranslation.

Returns
sequence string.

Definition at line 1568 of file SamRecord.cpp.

1569 {
1570  return(getSequence(mySequenceTranslation));
1571 }

Referenced by PileupElementBaseQual::addEntry(), SamRecordHelper::checkSequence(), SamTags::createMDTag(), getFields(), SamQuerySeqWithRefIter::getNextMatchMismatch(), getSequence(), and shiftIndelsLeft().

◆ getSequence() [2/4]

char SamRecord::getSequence ( int  index)

Get the sequence base at the specified index into this sequence 0 to readLength - 1, translating the base as specified by setSequenceTranslation.

Throws an exception if index is out of range.

Parameters
indexindex into the sequence string (0 to readLength-1).
Returns
the sequence base at the specified index into the sequence.

Definition at line 1651 of file SamRecord.cpp.

1652 {
1653  return(getSequence(index, mySequenceTranslation));
1654 }

References getSequence().

◆ getSequence() [3/4]

char SamRecord::getSequence ( int  index,
SequenceTranslation  translation 
)

Get the sequence base at the specified index into this sequence 0 to readLength - 1 performing the specified sequence translation.

Throws an exception if index is out of range.

Parameters
indexindex into the sequence string (0 to readLength-1).
translationtype of sequence translation to use.
Returns
the sequence base at the specified index into the sequence.

Definition at line 1657 of file SamRecord.cpp.

1658 {
1659  static const char * asciiBases = "=AC.G...T......N";
1660 
1661  // Determine the read length.
1662  int32_t readLen = getReadLength();
1663 
1664  // If the read length is 0, this method should not be called.
1665  if(readLen == 0)
1666  {
1667  String exceptionString = "SamRecord::getSequence(";
1668  exceptionString += index;
1669  exceptionString += ") is not allowed since sequence = '*'";
1670  throw std::runtime_error(exceptionString.c_str());
1671  }
1672  else if((index < 0) || (index >= readLen))
1673  {
1674  // Only get here if the index was out of range, so thow an exception.
1675  String exceptionString = "SamRecord::getSequence(";
1676  exceptionString += index;
1677  exceptionString += ") is out of range. Index must be between 0 and ";
1678  exceptionString += (readLen - 1);
1679  throw std::runtime_error(exceptionString.c_str());
1680  }
1681 
1682  // Determine if translation needs to be done.
1683  if((translation == NONE) || (myRefPtr == NULL))
1684  {
1685  // No translation needs to be done.
1686  if(mySequence.Length() == 0)
1687  {
1688  // Parse BAM sequence.
1689  if(myIsSequenceBufferValid)
1690  {
1691  return(index & 1 ?
1692  asciiBases[myPackedSequence[index / 2] & 0xF] :
1693  asciiBases[myPackedSequence[index / 2] >> 4]);
1694  }
1695  else
1696  {
1697  String exceptionString = "SamRecord::getSequence(";
1698  exceptionString += index;
1699  exceptionString += ") called with no sequence set";
1700  throw std::runtime_error(exceptionString.c_str());
1701  }
1702  }
1703  // Already have string.
1704  return(mySequence[index]);
1705  }
1706  else
1707  {
1708  // Need to translate the sequence either to have '=' or to not
1709  // have it.
1710  // First check to see if the sequence has been set.
1711  if(mySequence.Length() == 0)
1712  {
1713  // 0 Length, means that it is in the buffer, but has not yet
1714  // been synced to the string, so do the sync.
1715  setSequenceAndQualityFromBuffer();
1716  }
1717 
1718  // Check the type of translation.
1719  if(translation == EQUAL)
1720  {
1721  // Check whether or not the string has already been
1722  // retrieved that has the '=' in it.
1723  if(mySeqWithEq.length() == 0)
1724  {
1725  // The string with '=' has not yet been determined,
1726  // so get the string.
1727  // Check to see if the sequence is defined.
1728  if(mySequence == "*")
1729  {
1730  // Sequence is undefined, so no translation necessary.
1731  mySeqWithEq = '*';
1732  }
1733  else
1734  {
1735  // Sequence defined, so translate it.
1736  SamQuerySeqWithRef::seqWithEquals(mySequence.c_str(),
1737  myRecordPtr->myPosition,
1738  *(getCigarInfo()),
1739  getReferenceName(),
1740  *myRefPtr,
1741  mySeqWithEq);
1742  }
1743  }
1744  // Sequence is set, so return it.
1745  return(mySeqWithEq[index]);
1746  }
1747  else
1748  {
1749  // translation == BASES
1750  // Check whether or not the string has already been
1751  // retrieved that does not have the '=' in it.
1752  if(mySeqWithoutEq.length() == 0)
1753  {
1754  // The string with '=' has not yet been determined,
1755  // so get the string.
1756  // Check to see if the sequence is defined.
1757  if(mySequence == "*")
1758  {
1759  // Sequence is undefined, so no translation necessary.
1760  mySeqWithoutEq = '*';
1761  }
1762  else
1763  {
1764  // Sequence defined, so translate it.
1765  // The string without '=' has not yet been determined,
1766  // so get the string.
1767  SamQuerySeqWithRef::seqWithoutEquals(mySequence.c_str(),
1768  myRecordPtr->myPosition,
1769  *(getCigarInfo()),
1770  getReferenceName(),
1771  *myRefPtr,
1772  mySeqWithoutEq);
1773  }
1774  }
1775  // Sequence is set, so return it.
1776  return(mySeqWithoutEq[index]);
1777  }
1778  }
1779 }
static void seqWithoutEquals(const char *currentSeq, int32_t seq0BasedPos, Cigar &cigar, const char *referenceName, const GenomeSequence &refSequence, std::string &updatedSeq)
Gets the sequence converting '=' to the appropriate base using the reference.
static void seqWithEquals(const char *currentSeq, int32_t seq0BasedPos, Cigar &cigar, const char *referenceName, const GenomeSequence &refSequence, std::string &updatedSeq)
Gets the sequence with '=' in any position where the sequence matches the reference.
Cigar * getCigarInfo()
Returns a pointer to the Cigar object associated with this record.
Definition: SamRecord.cpp:1836

References EQUAL, getCigarInfo(), getReadLength(), getReferenceName(), NONE, SamQuerySeqWithRef::seqWithEquals(), and SamQuerySeqWithRef::seqWithoutEquals().

◆ getSequence() [4/4]

const char * SamRecord::getSequence ( SequenceTranslation  translation)

Returns the SAM formatted sequence string (SEQ) performing the specified sequence translation.

Parameters
translationtype of sequence translation to use.
Returns
sequence string.

Definition at line 1574 of file SamRecord.cpp.

1575 {
1576  myStatus = SamStatus::SUCCESS;
1577  if(mySequence.Length() == 0)
1578  {
1579  // 0 Length, means that it is in the buffer, but has not yet
1580  // been synced to the string, so do the sync.
1581  setSequenceAndQualityFromBuffer();
1582  }
1583 
1584  // Determine if translation needs to be done.
1585  if((translation == NONE) || (myRefPtr == NULL))
1586  {
1587  return mySequence.c_str();
1588  }
1589  else if(translation == EQUAL)
1590  {
1591  if(mySeqWithEq.length() == 0)
1592  {
1593  // Check to see if the sequence is defined.
1594  if(mySequence == "*")
1595  {
1596  // Sequence is undefined, so no translation necessary.
1597  mySeqWithEq = '*';
1598  }
1599  else
1600  {
1601  // Sequence defined, so translate it.
1602  SamQuerySeqWithRef::seqWithEquals(mySequence.c_str(),
1603  myRecordPtr->myPosition,
1604  *(getCigarInfo()),
1605  getReferenceName(),
1606  *myRefPtr,
1607  mySeqWithEq);
1608  }
1609  }
1610  return(mySeqWithEq.c_str());
1611  }
1612  else
1613  {
1614  // translation == BASES
1615  if(mySeqWithoutEq.length() == 0)
1616  {
1617  if(mySequence == "*")
1618  {
1619  // Sequence is undefined, so no translation necessary.
1620  mySeqWithoutEq = '*';
1621  }
1622  else
1623  {
1624  // Sequence defined, so translate it.
1625  SamQuerySeqWithRef::seqWithoutEquals(mySequence.c_str(),
1626  myRecordPtr->myPosition,
1627  *(getCigarInfo()),
1628  getReferenceName(),
1629  *myRefPtr,
1630  mySeqWithoutEq);
1631  }
1632  }
1633  return(mySeqWithoutEq.c_str());
1634  }
1635 }

References EQUAL, getCigarInfo(), getReferenceName(), NONE, SamQuerySeqWithRef::seqWithEquals(), SamQuerySeqWithRef::seqWithoutEquals(), and StatGenStatus::SUCCESS.

◆ getStatus()

const SamStatus & SamRecord::getStatus ( )

Returns the status associated with the last method that sets the status.

Returns
SamStatus of the last command that sets status.

Definition at line 2403 of file SamRecord.cpp.

2404 {
2405  return(myStatus);
2406 }

◆ getStringTag()

const String * SamRecord::getStringTag ( const char *  tag)

Get the string value for the specified tag.

Parameters
tagtag to retrieve
pointerto the tag's string value if found, NULL if not found.

Definition at line 2180 of file SamRecord.cpp.

2181 {
2182  // Parse the buffer if necessary.
2183  if(myNeedToSetTagsFromBuffer)
2184  {
2185  if(!setTagsFromBuffer())
2186  {
2187  // Failed to read the tags from the buffer, so cannot
2188  // get tags. setTagsFromBuffer set the errors,
2189  // so just return null.
2190  return(NULL);
2191  }
2192  }
2193 
2194  int key = MAKEKEY(tag[0], tag[1], 'Z');
2195  int offset = extras.Find(key);
2196 
2197  int value;
2198  if (offset < 0)
2199  {
2200  // Check for 'B' tag.
2201  key = MAKEKEY(tag[0], tag[1], 'B');
2202  offset = extras.Find(key);
2203  if(offset < 0)
2204  {
2205  // Tag not found.
2206  return(NULL);
2207  }
2208  }
2209 
2210  // Offset is valid, so return the tag.
2211  value = extras[offset];
2212  return(&(strings[value]));
2213 }

Referenced by SamTags::isMDTagCorrect(), and SamValidator::isValidTags().

◆ getTagLength()

uint32_t SamRecord::getTagLength ( )

Returns the length of the BAM formatted tags.

Returns
length of the BAM formatted tags.

Definition at line 1929 of file SamRecord.cpp.

1930 {
1931  myStatus = SamStatus::SUCCESS;
1932  if(myNeedToSetTagsFromBuffer)
1933  {
1934  // Tags are only set in the buffer, so the size of the tags is
1935  // the length of the record minus the starting location of the tags.
1936  unsigned char * tagStart =
1937  (unsigned char *)myRecordPtr->myData
1938  + myRecordPtr->myReadNameLength
1939  + myRecordPtr->myCigarLength * sizeof(int)
1940  + (myRecordPtr->myReadLength + 1) / 2 + myRecordPtr->myReadLength;
1941 
1942  // The non-tags take up from the start of the record to the tag start.
1943  // Do not include the block size part of the record since it is not
1944  // included in the size.
1945  uint32_t nonTagSize =
1946  tagStart - (unsigned char*)&(myRecordPtr->myReferenceID);
1947  // Tags take up the size of the block minus the non-tag section.
1948  uint32_t tagSize = myRecordPtr->myBlockSize - nonTagSize;
1949  return(tagSize);
1950  }
1951 
1952  // Tags are stored outside the buffer, so myTagBufferSize is set.
1953  return(myTagBufferSize);
1954 }

References StatGenStatus::SUCCESS.

◆ getTagsString()

bool SamRecord::getTagsString ( const char *  tags,
String returnString,
char  delim = '\t' 
)

Get the string representation of the tags from the record, formatted as TAG:TYPE:VALUE<delim>TAG:TYPE:VALUE...

Sets the Status to SUCCESS when the tags are successfully returned or the tags were not found. If a different error occured, the status is set appropriately. The delimiter between the tags to retrieve is ',' or ';'. ',' was added since the original delimiter, ';', requires the string to be quoted on the command-line.

Parameters
tagsthe tags to retrieve, formatted as TAG:TYPE,TAG:TYPE...
returnStringthe String to set (this method first clears returnString) to TAG:TYPE:VALUE<delim>TAG:TYPE:VALUE...
delimdelimiter to use to separate two tags, default is a tab.
Returns
true if there were not any errors even if no tags were found.

Definition at line 2082 of file SamRecord.cpp.

2083 {
2084  const char* currentTagPtr = tags;
2085 
2086  returnString.Clear();
2087  myStatus = SamStatus::SUCCESS;
2088  if(myNeedToSetTagsFromBuffer)
2089  {
2090  if(!setTagsFromBuffer())
2091  {
2092  // Failed to read the tags from the buffer, so cannot
2093  // get tags.
2094  return(false);
2095  }
2096  }
2097 
2098  bool returnStatus = true;
2099 
2100  while(*currentTagPtr != '\0')
2101  {
2102  // Tags are formatted as: XY:Z
2103  // Where X is [A-Za-z], Y is [A-Za-z], and
2104  // Z is A,i,f,Z,H (cCsSI are also excepted)
2105  if((currentTagPtr[0] == '\0') || (currentTagPtr[1] == '\0') ||
2106  (currentTagPtr[2] != ':') || (currentTagPtr[3] == '\0'))
2107  {
2108  myStatus.setStatus(SamStatus::INVALID,
2109  "getTagsString called with improperly formatted tags.\n");
2110  returnStatus = false;
2111  break;
2112  }
2113 
2114  // Construct the key.
2115  int key = MAKEKEY(currentTagPtr[0], currentTagPtr[1],
2116  currentTagPtr[3]);
2117  // Look to see if the key exsists in the hash.
2118  int offset = extras.Find(key);
2119 
2120  if(offset >= 0)
2121  {
2122  // Offset is set, so the key was found.
2123  if(!returnString.IsEmpty())
2124  {
2125  returnString += delim;
2126  }
2127  returnString += currentTagPtr[0];
2128  returnString += currentTagPtr[1];
2129  returnString += ':';
2130  returnString += currentTagPtr[3];
2131  returnString += ':';
2132 
2133  // First if it is an integer, determine the actual type of the int.
2134  char vtype;
2135  getTypeFromKey(key, vtype);
2136 
2137  switch(vtype)
2138  {
2139  case 'i':
2140  returnString += *(int*)getIntegerPtr(offset, vtype);
2141  break;
2142  case 'f':
2143  returnString += *(float*)getFloatPtr(offset);
2144  break;
2145  case 'Z':
2146  case 'B':
2147  returnString += *(String*)getStringPtr(offset);
2148  break;
2149  default:
2150  myStatus.setStatus(SamStatus::INVALID,
2151  "rmTag called with unknown type.\n");
2152  returnStatus = false;
2153  break;
2154  };
2155  }
2156  // Increment to the next tag.
2157  if((currentTagPtr[4] == ';') || (currentTagPtr[4] == ','))
2158  {
2159  // Increment once more.
2160  currentTagPtr += 5;
2161  }
2162  else if(currentTagPtr[4] != '\0')
2163  {
2164  // Invalid tag format.
2165  myStatus.setStatus(SamStatus::INVALID,
2166  "rmTags called with improperly formatted tags.\n");
2167  returnStatus = false;
2168  break;
2169  }
2170  else
2171  {
2172  // Last Tag.
2173  currentTagPtr += 4;
2174  }
2175  }
2176  return(returnStatus);
2177 }

References StatGenStatus::INVALID, StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.

◆ isCharType()

bool SamRecord::isCharType ( char  vtype)
static

Returns whether or not the specified vtype is a char type.

Does not set SamStatus.

Parameters
vtypevalue type to check.
Returns
true if the passed in vtype is a char ('A'), false otherwise.

Definition at line 2062 of file SamRecord.cpp.

2063 {
2064  if(vtype == 'A')
2065  {
2066  return(true);
2067  }
2068  return(false);
2069 }

Referenced by SamRecordHelper::genSamTagString().

◆ isFloatType()

bool SamRecord::isFloatType ( char  vtype)
static

Returns whether or not the specified vtype is a float type.

Does not set SamStatus.

Parameters
vtypevalue type to check.
Returns
true if the passed in vtype is a float ('f'), false otherwise.

Definition at line 2052 of file SamRecord.cpp.

2053 {
2054  if(vtype == 'f')
2055  {
2056  return(true);
2057  }
2058  return(false);
2059 }

Referenced by SamRecordHelper::genSamTagString().

◆ isIntegerType()

bool SamRecord::isIntegerType ( char  vtype)
static

Returns whether or not the specified vtype is an integer type.

Does not set SamStatus.

Parameters
vtypevalue type to check.
Returns
true if the passed in vtype is an integer ('c', 'C', 's', 'S', 'i', 'I'), false otherwise.

Definition at line 2040 of file SamRecord.cpp.

2041 {
2042  if((vtype == 'c') || (vtype == 'C') ||
2043  (vtype == 's') || (vtype == 'S') ||
2044  (vtype == 'i') || (vtype == 'I'))
2045  {
2046  return(true);
2047  }
2048  return(false);
2049 }

Referenced by SamRecordHelper::genSamTagString().

◆ isStringType()

bool SamRecord::isStringType ( char  vtype)
static

Returns whether or not the specified vtype is a string type.

Does not set SamStatus.

Parameters
vtypevalue type to check.
Returns
true if the passed in vtype is a string ('Z'/'B'), false othwerise.

Definition at line 2072 of file SamRecord.cpp.

2073 {
2074  if((vtype == 'Z') || (vtype == 'B'))
2075  {
2076  return(true);
2077  }
2078  return(false);
2079 }

Referenced by SamRecordHelper::genSamTagString().

◆ isValid()

bool SamRecord::isValid ( SamFileHeader header)

Returns whether or not the record is valid, setting the status to indicate success or failure.

Parameters
headerSAM Header associated with the record. Used to perform some validation against the header.
Returns
true if the record is valid, false if not.

Definition at line 161 of file SamRecord.cpp.

162 {
163  myStatus = SamStatus::SUCCESS;
164  SamValidationErrors invalidSamErrors;
165  if(!SamValidator::isValid(header, *this, invalidSamErrors))
166  {
167  // The record is not valid.
168  std::string errorMessage = "";
169  invalidSamErrors.getErrorString(errorMessage);
170  myStatus.setStatus(SamStatus::INVALID, errorMessage.c_str());
171  return(false);
172  }
173  // The record is valid.
174  return(true);
175 }
The SamValidationErrors class is a container class that holds SamValidationError Objects,...
void getErrorString(std::string &errorString) const
Append the error messages contained in this container to the passed in string.
static bool isValid(SamFileHeader &samHeader, SamRecord &samRecord, SamValidationErrors &validationErrors)
Validates whether or not the specified SamRecord is valid, calling all of the other validations.

References SamValidationErrors::getErrorString(), StatGenStatus::INVALID, SamValidator::isValid(), StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.

◆ resetRecord()

void SamRecord::resetRecord ( )

Reset the fields of the record to a default value.

This is not necessary when you are reading a SAM/BAM file, but if you are setting fields, it is a good idea to clean out a record before reusing it. Clearing it allows you to not have to set any empty fields.

Definition at line 91 of file SamRecord.cpp.

92 {
93  myIsBufferSynced = true;
94 
95  myRecordPtr->myBlockSize = DEFAULT_BLOCK_SIZE;
96  myRecordPtr->myReferenceID = -1;
97  myRecordPtr->myPosition = -1;
98  myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH;
99  myRecordPtr->myMapQuality = 0;
100  myRecordPtr->myBin = DEFAULT_BIN;
101  myRecordPtr->myCigarLength = 0;
102  myRecordPtr->myFlag = 0;
103  myRecordPtr->myReadLength = 0;
104  myRecordPtr->myMateReferenceID = -1;
105  myRecordPtr->myMatePosition = -1;
106  myRecordPtr->myInsertSize = 0;
107 
108  // Set the sam values for the variable length fields.
109  // TODO - one way to speed this up might be to not set to "*" and just
110  // clear them, and write out a '*' for SAM if it is empty.
111  myReadName = DEFAULT_READ_NAME;
112  myReferenceName = "*";
113  myMateReferenceName = "*";
114  myCigar = "*";
115  mySequence = "*";
116  mySeqWithEq.clear();
117  mySeqWithoutEq.clear();
118  myQuality = "*";
119  myNeedToSetTagsFromBuffer = false;
120  myNeedToSetTagsInBuffer = false;
121 
122  // Initialize the calculated alignment info to the uncalculated value.
123  myAlignmentLength = -1;
124  myUnclippedStartOffset = -1;
125  myUnclippedEndOffset = -1;
126 
127  clearTags();
128 
129  // Set the bam values for the variable length fields.
130  // Only the read name needs to be set, the others are a length of 0.
131  // Set the read name. The min size of myRecordPtr includes the size for
132  // the default read name.
133  memcpy(&(myRecordPtr->myData), myReadName.c_str(),
134  myRecordPtr->myReadNameLength);
135 
136  // Set that the variable length buffer fields are valid.
137  myIsReadNameBufferValid = true;
138  myIsCigarBufferValid = true;
139  myPackedSequence =
140  (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength +
141  myRecordPtr->myCigarLength * sizeof(int);
142  myIsSequenceBufferValid = true;
143  myBufferSequenceTranslation = NONE;
144 
145  myPackedQuality = myPackedSequence;
146  myIsQualityBufferValid = true;
147  myIsTagsBufferValid = true;
148  myIsBinValid = true;
149 
150  myCigarTempBufferLength = -1;
151 
152  myStatus = SamStatus::SUCCESS;
153 
154  NOT_FOUND_TAG_STRING = "";
155  NOT_FOUND_TAG_INT = -1; // TODO - deprecate
156 }
void clearTags()
Clear the tags in this record.
Definition: SamRecord.cpp:977

References clearTags(), NONE, and StatGenStatus::SUCCESS.

Referenced by SamRecord(), ~SamRecord(), setBuffer(), and setBufferFromFile().

◆ rmTag()

bool SamRecord::rmTag ( const char *  tag,
char  type 
)

Remove a tag.

Parameters
tagtag to remove.
typeof the tag to be removed.
Returns
true if the tag no longer exists in the record, false if it could not be removed (Returns true if the tag was not found in the record).

Definition at line 992 of file SamRecord.cpp.

993 {
994  // Check the length of tag.
995  if(strlen(tag) != 2)
996  {
997  // Tag is the wrong length.
998  myStatus.setStatus(SamStatus::INVALID,
999  "rmTag called with tag that is not 2 characters\n");
1000  return(false);
1001  }
1002 
1003  myStatus = SamStatus::SUCCESS;
1004  if(myNeedToSetTagsFromBuffer)
1005  {
1006  if(!setTagsFromBuffer())
1007  {
1008  // Failed to read the tags from the buffer, so cannot
1009  // get tags.
1010  return(false);
1011  }
1012  }
1013 
1014  // Construct the key.
1015  int key = MAKEKEY(tag[0], tag[1], type);
1016  // Look to see if the key exsists in the hash.
1017  int offset = extras.Find(key);
1018 
1019  if(offset < 0)
1020  {
1021  // Not found, so return true, successfully removed since
1022  // it is not in tag.
1023  return(true);
1024  }
1025 
1026  // Offset is set, so the key was found.
1027  // First if it is an integer, determine the actual type of the int.
1028  char vtype;
1029  getTypeFromKey(key, vtype);
1030  if(vtype == 'i')
1031  {
1032  vtype = getIntegerType(offset);
1033  }
1034 
1035  // Offset is set, so recalculate the buffer size without this entry.
1036  // Do NOT remove from strings, integers, or floats because then
1037  // extras would need to be updated for all entries with the new indexes
1038  // into those variables.
1039  int rmBuffSize = 0;
1040  switch(vtype)
1041  {
1042  case 'A':
1043  case 'c':
1044  case 'C':
1045  rmBuffSize = 4;
1046  break;
1047  case 's':
1048  case 'S':
1049  rmBuffSize = 5;
1050  break;
1051  case 'i':
1052  case 'I':
1053  rmBuffSize = 7;
1054  break;
1055  case 'f':
1056  rmBuffSize = 7;
1057  break;
1058  case 'Z':
1059  rmBuffSize = 4 + getString(offset).Length();
1060  break;
1061  case 'B':
1062  rmBuffSize = 3 + getBtagBufferSize(getString(offset));
1063  break;
1064  default:
1065  myStatus.setStatus(SamStatus::INVALID,
1066  "rmTag called with unknown type.\n");
1067  return(false);
1068  break;
1069  };
1070 
1071  // The buffer tags are now out of sync.
1072  myNeedToSetTagsInBuffer = true;
1073  myIsTagsBufferValid = false;
1074  myIsBufferSynced = false;
1075  myTagBufferSize -= rmBuffSize;
1076 
1077  // Remove from the hash.
1078  extras.Delete(offset);
1079  return(true);
1080 }
const String & getString(const char *tag)
Get the string value for the specified tag.
Definition: SamRecord.cpp:2314

References getString(), StatGenStatus::INVALID, StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.

◆ rmTags()

bool SamRecord::rmTags ( const char *  tags)

Remove tags.

The delimiter between the tags is ',' or ';'. ',' was added since the original delimiter, ';', requires the string to be quoted on the command-line.

Parameters
tagstags to remove, formatted as Tag:Type,Tag:Type,Tag:Type...
Returns
true if all tags no longer exist in the record, false if any could not be removed (Returns true if the tags were not found in the record). SamStatus is set to INVALID if the tags are incorrectly formatted.

Definition at line 1083 of file SamRecord.cpp.

1084 {
1085  const char* currentTagPtr = tags;
1086 
1087  myStatus = SamStatus::SUCCESS;
1088  if(myNeedToSetTagsFromBuffer)
1089  {
1090  if(!setTagsFromBuffer())
1091  {
1092  // Failed to read the tags from the buffer, so cannot
1093  // get tags.
1094  return(false);
1095  }
1096  }
1097 
1098  bool returnStatus = true;
1099 
1100  int rmBuffSize = 0;
1101  while(*currentTagPtr != '\0')
1102  {
1103 
1104  // Tags are formatted as: XY:Z
1105  // Where X is [A-Za-z], Y is [A-Za-z], and
1106  // Z is A,i,f,Z,H (cCsSI are also excepted)
1107  if((currentTagPtr[0] == '\0') || (currentTagPtr[1] == '\0') ||
1108  (currentTagPtr[2] != ':') || (currentTagPtr[3] == '\0'))
1109  {
1110  myStatus.setStatus(SamStatus::INVALID,
1111  "rmTags called with improperly formatted tags.\n");
1112  returnStatus = false;
1113  break;
1114  }
1115 
1116  // Construct the key.
1117  int key = MAKEKEY(currentTagPtr[0], currentTagPtr[1],
1118  currentTagPtr[3]);
1119  // Look to see if the key exsists in the hash.
1120  int offset = extras.Find(key);
1121 
1122  if(offset >= 0)
1123  {
1124  // Offset is set, so the key was found.
1125  // First if it is an integer, determine the actual type of the int.
1126  char vtype;
1127  getTypeFromKey(key, vtype);
1128  if(vtype == 'i')
1129  {
1130  vtype = getIntegerType(offset);
1131  }
1132 
1133  // Offset is set, so recalculate the buffer size without this entry.
1134  // Do NOT remove from strings, integers, or floats because then
1135  // extras would need to be updated for all entries with the new indexes
1136  // into those variables.
1137  switch(vtype)
1138  {
1139  case 'A':
1140  case 'c':
1141  case 'C':
1142  rmBuffSize += 4;
1143  break;
1144  case 's':
1145  case 'S':
1146  rmBuffSize += 5;
1147  break;
1148  case 'i':
1149  case 'I':
1150  rmBuffSize += 7;
1151  break;
1152  case 'f':
1153  rmBuffSize += 7;
1154  break;
1155  case 'Z':
1156  rmBuffSize += 4 + getString(offset).Length();
1157  break;
1158  case 'B':
1159  rmBuffSize += 3 + getBtagBufferSize(getString(offset));
1160  break;
1161  default:
1162  myStatus.setStatus(SamStatus::INVALID,
1163  "rmTag called with unknown type.\n");
1164  returnStatus = false;
1165  break;
1166  };
1167 
1168  // Remove from the hash.
1169  extras.Delete(offset);
1170  }
1171  // Increment to the next tag.
1172  if((currentTagPtr[4] == ';') || (currentTagPtr[4] == ','))
1173  {
1174  // Increment once more.
1175  currentTagPtr += 5;
1176  }
1177  else if(currentTagPtr[4] != '\0')
1178  {
1179  // Invalid tag format.
1180  myStatus.setStatus(SamStatus::INVALID,
1181  "rmTags called with improperly formatted tags.\n");
1182  returnStatus = false;
1183  break;
1184  }
1185  else
1186  {
1187  // Last Tag.
1188  currentTagPtr += 4;
1189  }
1190  }
1191 
1192  // The buffer tags are now out of sync.
1193  myNeedToSetTagsInBuffer = true;
1194  myIsTagsBufferValid = false;
1195  myIsBufferSynced = false;
1196  myTagBufferSize -= rmBuffSize;
1197 
1198 
1199  return(returnStatus);
1200 }

References getString(), StatGenStatus::INVALID, StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.

◆ set0BasedMatePosition()

bool SamRecord::set0BasedMatePosition ( int32_t  matePosition)

Set the mate/next fragment's leftmost position using the specified 0-based (BAM format) value.

Internal processing handles the switching between SAM/BAM formats when read/written.

Parameters
position0-based start position
Returns
true if successfully set, false if not.

Definition at line 328 of file SamRecord.cpp.

329 {
330  myStatus = SamStatus::SUCCESS;
331  myRecordPtr->myMatePosition = matePosition;
332  return true;
333 }

References StatGenStatus::SUCCESS.

Referenced by set1BasedMatePosition().

◆ set0BasedPosition()

bool SamRecord::set0BasedPosition ( int32_t  position)

Set the leftmost position using the specified 0-based (BAM format) value.

Internal processing handles the switching between SAM/BAM formats when read/written.

Parameters
position0-based start position
Returns
true if successfully set, false if not.

Definition at line 242 of file SamRecord.cpp.

243 {
244  myStatus = SamStatus::SUCCESS;
245  myRecordPtr->myPosition = position;
246  myIsBinValid = false;
247  return true;
248 }

References StatGenStatus::SUCCESS.

Referenced by set1BasedPosition(), and SamFilter::softClip().

◆ set1BasedMatePosition()

bool SamRecord::set1BasedMatePosition ( int32_t  matePosition)

Set the mate/next fragment's leftmost position (PNEXT) using the specified 1-based (SAM format) value.

Internal processing handles the switching between SAM/BAM formats when read/written.

Parameters
position1-based start position
Returns
true if successfully set, false if not.

Definition at line 322 of file SamRecord.cpp.

323 {
324  return(set0BasedMatePosition(matePosition - 1));
325 }
bool set0BasedMatePosition(int32_t matePosition)
Set the mate/next fragment's leftmost position using the specified 0-based (BAM format) value.
Definition: SamRecord.cpp:328

References set0BasedMatePosition().

◆ set1BasedPosition()

bool SamRecord::set1BasedPosition ( int32_t  position)

Set the leftmost position (POS) using the specified 1-based (SAM format) value.

Internal processing handles the switching between SAM/BAM formats when read/written.

Parameters
position1-based start position
Returns
true if successfully set, false if not.

Definition at line 236 of file SamRecord.cpp.

237 {
238  return(set0BasedPosition(position - 1));
239 }
bool set0BasedPosition(int32_t position)
Set the leftmost position using the specified 0-based (BAM format) value.
Definition: SamRecord.cpp:242

References set0BasedPosition().

◆ setBuffer()

SamStatus::Status SamRecord::setBuffer ( const char *  fromBuffer,
uint32_t  fromBufferSize,
SamFileHeader header 
)

Sets the SamRecord to contain the information in the BAM formatted fromBuffer.

Parameters
fromBufferbuffer to read the BAM record from.
fromBufferSizesize of the buffer containing the BAM record.
headerBAM header for the record.
Returns
status of reading the BAM record from the buffer.

Definition at line 525 of file SamRecord.cpp.

528 {
529  myStatus = SamStatus::SUCCESS;
530  if((fromBuffer == NULL) || (fromBufferSize == 0))
531  {
532  // Buffer is empty.
534  "Cannot parse an empty file.");
535  return(SamStatus::FAIL_PARSE);
536  }
537 
538  // Clear the record.
539  resetRecord();
540 
541  // allocate space for the record size.
542  if(!allocateRecordStructure(fromBufferSize))
543  {
544  // Failed to allocate space.
545  return(SamStatus::FAIL_MEM);
546  }
547 
548  memcpy(myRecordPtr, fromBuffer, fromBufferSize);
549 
550  setVariablesForNewBuffer(header);
551 
552  // Return the status of the record.
553  return(SamStatus::SUCCESS);
554 }
@ FAIL_MEM
fail a memory allocation.
Definition: StatGenStatus.h:45

References StatGenStatus::FAIL_MEM, StatGenStatus::FAIL_PARSE, resetRecord(), StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.

◆ setBufferFromFile()

SamStatus::Status SamRecord::setBufferFromFile ( IFILE  filePtr,
SamFileHeader header 
)

Read the BAM record from a file.

Parameters
filePtrfile to read the buffer from.
headerBAM header for the record.
Returns
status of the reading the BAM record from the file.

Definition at line 558 of file SamRecord.cpp.

560 {
561  myStatus = SamStatus::SUCCESS;
562  if((filePtr == NULL) || (filePtr->isOpen() == false))
563  {
564  // File is not open, return failure.
566  "Can't read from an unopened file.");
567  return(SamStatus::FAIL_ORDER);
568  }
569 
570  // Clear the record.
571  resetRecord();
572 
573  // read the record size.
574  int numBytes =
575  ifread(filePtr, &(myRecordPtr->myBlockSize), sizeof(int32_t));
576 
577  // Check to see if the end of the file was hit and no bytes were read.
578  if(ifeof(filePtr) && (numBytes == 0))
579  {
580  // End of file, nothing was read, no more records.
581  std::string statusMsg = "No more records left to read, ";
582  statusMsg += filePtr->getFileName();
583  statusMsg += ".";
585  statusMsg.c_str());
586  return(SamStatus::NO_MORE_RECS);
587  }
588 
589  if(numBytes != sizeof(int32_t))
590  {
591  // Failed to read the entire block size. Either the end of the file
592  // was reached early or there was an error.
593  if(ifeof(filePtr))
594  {
595  // Error: end of the file reached prior to reading the rest of the
596  // record.
597  std::string statusMsg = "EOF reached in the middle of a record, ";
598  statusMsg += filePtr->getFileName();
599  statusMsg += ".";
601  statusMsg.c_str());
602  return(SamStatus::FAIL_PARSE);
603  }
604  else
605  {
606  // Error reading.
607  std::string statusMsg = "Failed to read the record size, ";
608  statusMsg += filePtr->getFileName();
609  statusMsg += ".";
610  myStatus.setStatus(SamStatus::FAIL_IO,
611  statusMsg.c_str());
612  return(SamStatus::FAIL_IO);
613  }
614  }
615 
616  // allocate space for the record size.
617  if(!allocateRecordStructure(myRecordPtr->myBlockSize + sizeof(int32_t)))
618  {
619  // Failed to allocate space.
620  // Status is set by allocateRecordStructure.
621  return(SamStatus::FAIL_MEM);
622  }
623 
624  // Read the rest of the alignment block, starting at the reference id.
625  if(ifread(filePtr, &(myRecordPtr->myReferenceID), myRecordPtr->myBlockSize)
626  != (unsigned int)myRecordPtr->myBlockSize)
627  {
628  // Error reading the record. Reset it and return failure.
629  resetRecord();
630  std::string statusMsg = "Failed to read the record, ";
631  statusMsg += filePtr->getFileName();
632  statusMsg += ".";
633  myStatus.setStatus(SamStatus::FAIL_IO,
634  statusMsg.c_str());
635  return(SamStatus::FAIL_IO);
636  }
637 
638  setVariablesForNewBuffer(header);
639 
640  // Return the status of the record.
641  return(SamStatus::SUCCESS);
642 }
int ifeof(IFILE file)
Check to see if we have reached the EOF (returns 0 if not EOF).
Definition: InputFile.h:654
unsigned int ifread(IFILE file, void *buffer, unsigned int size)
Read up to size bytes from the file into the buffer.
Definition: InputFile.h:600
const char * getFileName() const
Get the filename that is currently opened.
Definition: InputFile.h:473
bool isOpen() const
Returns whether or not the file was successfully opened.
Definition: InputFile.h:423
@ NO_MORE_RECS
NO_MORE_RECS: failed to read a record since there are no more to read either in the file or section i...
Definition: StatGenStatus.h:36
@ FAIL_IO
method failed due to an I/O issue.
Definition: StatGenStatus.h:37
@ FAIL_ORDER
FAIL_ORDER: method failed because it was called out of order, like trying to read a file without open...
Definition: StatGenStatus.h:41

References StatGenStatus::FAIL_IO, StatGenStatus::FAIL_MEM, StatGenStatus::FAIL_ORDER, StatGenStatus::FAIL_PARSE, InputFile::getFileName(), ifeof(), ifread(), InputFile::isOpen(), StatGenStatus::NO_MORE_RECS, resetRecord(), StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.

◆ setCigar() [1/2]

bool SamRecord::setCigar ( const char *  cigar)

Set the CIGAR to the specified SAM formatted cigar string.

Internal processing handles the switching between SAM/BAM formats when read/written.

Parameters
cigarstring containing the SAM formatted cigar.
Returns
true if successfully set, false if not.

Definition at line 259 of file SamRecord.cpp.

260 {
261  myStatus = SamStatus::SUCCESS;
262  myCigar = cigar;
263 
264  myIsBufferSynced = false;
265  myIsCigarBufferValid = false;
266  myCigarTempBufferLength = -1;
267  myIsBinValid = false;
268 
269  // Initialize the calculated alignment info to the uncalculated value.
270  myAlignmentLength = -1;
271  myUnclippedStartOffset = -1;
272  myUnclippedEndOffset = -1;
273 
274  return true;
275 }

References StatGenStatus::SUCCESS.

Referenced by SamFilter::filterRead(), shiftIndelsLeft(), and SamFilter::softClip().

◆ setCigar() [2/2]

bool SamRecord::setCigar ( const Cigar cigar)

Set the CIGAR to the specified Cigar object.

Internal processing handles the switching between SAM/BAM formats when read/written.

Parameters
cigarobject to set this record's cigar to have.
Returns
true if successfully set, false if not.

Definition at line 278 of file SamRecord.cpp.

279 {
280  myStatus = SamStatus::SUCCESS;
281  cigar.getCigarString(myCigar);
282 
283  myIsBufferSynced = false;
284  myIsCigarBufferValid = false;
285  myCigarTempBufferLength = -1;
286  myIsBinValid = false;
287 
288  // Initialize the calculated alignment info to the uncalculated value.
289  myAlignmentLength = -1;
290  myUnclippedStartOffset = -1;
291  myUnclippedEndOffset = -1;
292 
293  return true;
294 }
void getCigarString(String &cigarString) const
Set the passed in String to the string reprentation of the Cigar operations in this object.
Definition: Cigar.cpp:52

References Cigar::getCigarString(), and StatGenStatus::SUCCESS.

◆ setFlag()

bool SamRecord::setFlag ( uint16_t  flag)

Set the bitwise FLAG to the specified value.

Parameters
flaginteger flag to use.
Returns
true if successfully set, false if not.

Definition at line 215 of file SamRecord.cpp.

216 {
217  myStatus = SamStatus::SUCCESS;
218  myRecordPtr->myFlag = flag;
219  return true;
220 }

References StatGenStatus::SUCCESS.

Referenced by SamFilter::filterRead().

◆ setInsertSize()

bool SamRecord::setInsertSize ( int32_t  insertSize)

Sets the inferred insert size (ISIZE)/observed template length (TLEN).

Parameters
insertSizeinferred insert size/observed template length.
Returns
true if successfully set, false if not.

Definition at line 336 of file SamRecord.cpp.

337 {
338  myStatus = SamStatus::SUCCESS;
339  myRecordPtr->myInsertSize = insertSize;
340  return true;
341 }

References StatGenStatus::SUCCESS.

◆ setMapQuality()

bool SamRecord::setMapQuality ( uint8_t  mapQuality)

Set the mapping quality (MAPQ).

Parameters
mapQualitymap quality to set in the record.
Returns
true if successfully set, false if not.

Definition at line 251 of file SamRecord.cpp.

252 {
253  myStatus = SamStatus::SUCCESS;
254  myRecordPtr->myMapQuality = mapQuality;
255  return true;
256 }

References StatGenStatus::SUCCESS.

Referenced by SamFilter::filterRead().

◆ setMateReferenceName()

bool SamRecord::setMateReferenceName ( SamFileHeader header,
const char *  mateReferenceName 
)

Set the mate/next fragment's reference sequence name (RNEXT) to the specified name, using the header to determine the mate reference id.

Parameters
headerSAM/BAM header to use to determine the mate reference id.
referenceNamemate reference name to use.
Returns
true if successfully set, false if not

Definition at line 297 of file SamRecord.cpp.

299 {
300  myStatus = SamStatus::SUCCESS;
301  // Set the mate reference, if it is "=", set it to be equal
302  // to myReferenceName. This assumes that myReferenceName has already
303  // been called.
304  if(strcmp(mateReferenceName, FIELD_ABSENT_STRING) == 0)
305  {
306  myMateReferenceName = myReferenceName;
307  }
308  else
309  {
310  myMateReferenceName = mateReferenceName;
311  }
312 
313  // Set the Mate Reference ID.
314  // If the reference ID does not already exist, add it (pass true)
315  myRecordPtr->myMateReferenceID =
316  header.getReferenceID(myMateReferenceName, true);
317 
318  return true;
319 }
int getReferenceID(const String &referenceName, bool addID=false)
Get the reference ID for the specified reference name (chromosome).

References SamFileHeader::getReferenceID(), and StatGenStatus::SUCCESS.

◆ setQuality()

bool SamRecord::setQuality ( const char *  quality)

Sets the quality (QUAL) to the specified SAM formatted quality string.

Internal processing handles switching between SAM/BAM formats when read/written.

Parameters
qualitySAM quality string.
Returns
true if successfully set, false if not.

Definition at line 357 of file SamRecord.cpp.

358 {
359  myStatus = SamStatus::SUCCESS;
360  myQuality = quality;
361  myIsBufferSynced = false;
362  myIsQualityBufferValid = false;
363  return true;
364 }

References StatGenStatus::SUCCESS.

◆ setReadName()

bool SamRecord::setReadName ( const char *  readName)

Set QNAME to the passed in name.

Parameters
readNamethe readname to set the QNAME to.
Returns
true if successfully set, false if not.

Definition at line 193 of file SamRecord.cpp.

194 {
195  myReadName = readName;
196  myIsBufferSynced = false;
197  myIsReadNameBufferValid = false;
198  myStatus = SamStatus::SUCCESS;
199 
200  // The read name must at least have some length, otherwise this is a parsing
201  // error.
202  if(myReadName.Length() == 0)
203  {
204  // Invalid - reset ReadName return false.
205  myReadName = DEFAULT_READ_NAME;
206  myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH;
207  myStatus.setStatus(SamStatus::INVALID, "0 length Query Name.");
208  return(false);
209  }
210 
211  return true;
212 }

References StatGenStatus::INVALID, StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.

◆ setReference()

void SamRecord::setReference ( GenomeSequence reference)

Set the reference to the specified genome sequence object.

Parameters
referencepointer to the GenomeSequence object.

Definition at line 178 of file SamRecord.cpp.

179 {
180  myRefPtr = reference;
181 }

Referenced by SamFile::GetNumOverlaps(), SamFile::ReadRecord(), SamFile::validateSortOrder(), and SamFile::WriteRecord().

◆ setReferenceName()

bool SamRecord::setReferenceName ( SamFileHeader header,
const char *  referenceName 
)

Set the reference sequence name (RNAME) to the specified name, using the header to determine the reference id.

Parameters
headerSAM/BAM header to use to determine the reference id.
referenceNamereference name to use.
Returns
true if successfully set, false if not

Definition at line 223 of file SamRecord.cpp.

225 {
226  myStatus = SamStatus::SUCCESS;
227 
228  myReferenceName = referenceName;
229  // If the reference ID does not already exist, add it (pass true)
230  myRecordPtr->myReferenceID = header.getReferenceID(referenceName, true);
231 
232  return true;
233 }

References SamFileHeader::getReferenceID(), and StatGenStatus::SUCCESS.

◆ setSequence()

bool SamRecord::setSequence ( const char *  seq)

Sets the sequence (SEQ) to the specified SAM formatted sequence string.

Internal processing handles switching between SAM/BAM formats when read/written.

Parameters
seqSAM sequence string. May contain '='.
Returns
true if successfully set, false if not.

Definition at line 344 of file SamRecord.cpp.

345 {
346  myStatus = SamStatus::SUCCESS;
347  mySequence = seq;
348  mySeqWithEq.clear();
349  mySeqWithoutEq.clear();
350 
351  myIsBufferSynced = false;
352  myIsSequenceBufferValid = false;
353  return true;
354 }

References StatGenStatus::SUCCESS.

◆ setSequenceTranslation()

void SamRecord::setSequenceTranslation ( SequenceTranslation  translation)

Set the type of sequence translation to use when getting the sequence.

The default type (if this method is never called) is NONE (the sequence is left as-is). Can be over-ridden by using the accessors that take a SequenceTranslation parameter.

Parameters
translationtype of sequence translation to use.

Definition at line 187 of file SamRecord.cpp.

188 {
189  mySequenceTranslation = translation;
190 }

Referenced by SamFile::GetNumOverlaps(), SamFile::ReadRecord(), and SamFile::validateSortOrder().

◆ shiftIndelsLeft()

bool SamRecord::shiftIndelsLeft ( )

Shift the indels (if any) to the left by updating the CIGAR.

Returns
true if the cigar was shifted, false if not.

Definition at line 368 of file SamRecord.cpp.

369 {
370  // Check to see whether or not the Cigar has already been
371  // set - this is determined by checking if alignment length
372  // is set since alignment length and the cigar are set
373  // at the same time.
374  if(myAlignmentLength == -1)
375  {
376  // Not been set, so calculate it.
377  parseCigar();
378  }
379 
380  // Track whether or not there was a shift.
381  bool shifted = false;
382 
383  // Cigar is set, so now myCigarRoller can be used.
384  // Track where in the read we are.
385  uint32_t currentPos = 0;
386 
387  // Since the loop starts at 1 because the first operation can't be shifted,
388  // increment the currentPos past the first operation.
389  if(Cigar::foundInQuery(myCigarRoller[0]))
390  {
391  // This op was found in the read, increment the current position.
392  currentPos += myCigarRoller[0].count;
393  }
394 
395  int numOps = myCigarRoller.size();
396 
397  // Loop through the cigar operations from the 2nd operation since
398  // the first operation is already on the end and can't shift.
399  for(int currentOp = 1; currentOp < numOps; currentOp++)
400  {
401  if(myCigarRoller[currentOp].operation == Cigar::insert)
402  {
403  // For now, only shift a max of 1 operation.
404  int prevOpIndex = currentOp-1;
405  // Track the next op for seeing if it is the same as the
406  // previous for merging reasons.
407  int nextOpIndex = currentOp+1;
408  if(nextOpIndex == numOps)
409  {
410  // There is no next op, so set it equal to the current one.
411  nextOpIndex = currentOp;
412  }
413  // The start of the previous operation, so we know when we hit it
414  // so we don't shift past it.
415  uint32_t prevOpStart =
416  currentPos - myCigarRoller[prevOpIndex].count;
417 
418  // We can only shift if the previous operation
419  if(!Cigar::isMatchOrMismatch(myCigarRoller[prevOpIndex]))
420  {
421  // TODO - shift past pads
422  // An insert is in the read, so increment the position.
423  currentPos += myCigarRoller[currentOp].count;
424  // Not a match/mismatch, so can't shift into it.
425  continue;
426  }
427 
428  // It is a match or mismatch, so check to see if we can
429  // shift into it.
430 
431  // The end of the insert is calculated by adding the size
432  // of this insert minus 1 to the start of the insert.
433  uint32_t insertEndPos =
434  currentPos + myCigarRoller[currentOp].count - 1;
435 
436  // The insert starts at the current position.
437  uint32_t insertStartPos = currentPos;
438 
439  // Loop as long as the position before the insert start
440  // matches the last character in the insert. If they match,
441  // the insert can be shifted one index left because the
442  // implied reference will not change. If they do not match,
443  // we can't shift because the implied reference would change.
444  // Stop loop when insertStartPos = prevOpStart, because we
445  // don't want to move past that.
446  while((insertStartPos > prevOpStart) &&
447  (getSequence(insertEndPos,BASES) ==
448  getSequence(insertStartPos - 1, BASES)))
449  {
450  // We can shift, so move the insert start & end one left.
451  --insertEndPos;
452  --insertStartPos;
453  }
454 
455  // Determine if a shift has occurred.
456  int shiftLen = currentPos - insertStartPos;
457  if(shiftLen > 0)
458  {
459  // Shift occured, so adjust the cigar if the cigar will
460  // not become more operations.
461  // If the next operation is the same as the previous or
462  // if the insert and the previous operation switch positions
463  // then the cigar has the same number of operations.
464  // If the next operation is different, and the shift splits
465  // the previous operation in 2, then the cigar would
466  // become longer, so we do not want to shift.
467  if(myCigarRoller[nextOpIndex].operation ==
468  myCigarRoller[prevOpIndex].operation)
469  {
470  // The operations are the same, so merge them by adding
471  // the length of the shift to the next operation.
472  myCigarRoller.IncrementCount(nextOpIndex, shiftLen);
473  myCigarRoller.IncrementCount(prevOpIndex, -shiftLen);
474 
475  // If the previous op length is 0, just remove that
476  // operation.
477  if(myCigarRoller[prevOpIndex].count == 0)
478  {
479  myCigarRoller.Remove(prevOpIndex);
480  }
481  shifted = true;
482  }
483  else
484  {
485  // Can only shift if the insert shifts past the
486  // entire previous operation, otherwise an operation
487  // would need to be added.
488  if(insertStartPos == prevOpStart)
489  {
490  // Swap the positions of the insert and the
491  // previous operation.
492  myCigarRoller.Update(currentOp,
493  myCigarRoller[prevOpIndex].operation,
494  myCigarRoller[prevOpIndex].count);
495  // Size of the previous op is the entire
496  // shift length.
497  myCigarRoller.Update(prevOpIndex,
499  shiftLen);
500  shifted = true;
501  }
502  }
503  }
504  // An insert is in the read, so increment the position.
505  currentPos += myCigarRoller[currentOp].count;
506  }
507  else if(Cigar::foundInQuery(myCigarRoller[currentOp]))
508  {
509  // This op was found in the read, increment the current position.
510  currentPos += myCigarRoller[currentOp].count;
511  }
512  }
513  if(shifted)
514  {
515  // TODO - setCigar is currently inefficient because later the cigar
516  // roller will be recalculated, but for now it will work.
517  setCigar(myCigarRoller);
518  }
519  return(shifted);
520 }
bool Remove(int index)
Remove the operation at the specified index.
bool IncrementCount(int index, int increment)
Increments the count for the operation at the specified index by the specified value,...
bool Update(int index, Operation op, int count)
Updates the operation at the specified index to be the specified operation and have the specified cou...
int size() const
Return the number of cigar operations.
Definition: Cigar.h:364
static bool isMatchOrMismatch(Operation op)
Return true if the specified operation is a match/mismatch operation, false if not.
Definition: Cigar.h:298
static bool foundInQuery(Operation op)
Return true if the specified operation is found in the query sequence, false if not.
Definition: Cigar.h:219
@ insert
insertion to the reference (the query sequence contains bases that have no corresponding base in the ...
Definition: Cigar.h:91
bool setCigar(const char *cigar)
Set the CIGAR to the specified SAM formatted cigar string.
Definition: SamRecord.cpp:259

References BASES, Cigar::foundInQuery(), getSequence(), CigarRoller::IncrementCount(), Cigar::insert, Cigar::isMatchOrMismatch(), CigarRoller::Remove(), setCigar(), Cigar::size(), and CigarRoller::Update().

◆ writeRecordBuffer() [1/2]

SamStatus::Status SamRecord::writeRecordBuffer ( IFILE  filePtr)

Write the record as a BAM into the specified already opened file.

Parameters
filePtrfile to write the BAM record into.
Returns
status of the write.

Definition at line 1237 of file SamRecord.cpp.

1238 {
1239  return(writeRecordBuffer(filePtr, mySequenceTranslation));
1240 }
SamStatus::Status writeRecordBuffer(IFILE filePtr)
Write the record as a BAM into the specified already opened file.
Definition: SamRecord.cpp:1237

◆ writeRecordBuffer() [2/2]

SamStatus::Status SamRecord::writeRecordBuffer ( IFILE  filePtr,
SequenceTranslation  translation 
)

Write the record as a BAM into the specified already opened file using the specified translation on the sequence.

Parameters
filePtrfile to write the BAM record into.
translationtype of sequence translation to use.
Returns
status of the write.

Definition at line 1244 of file SamRecord.cpp.

1246 {
1247  myStatus = SamStatus::SUCCESS;
1248  if((filePtr == NULL) || (filePtr->isOpen() == false))
1249  {
1250  // File is not open, return failure.
1252  "Can't write to an unopened file.");
1253  return(SamStatus::FAIL_ORDER);
1254  }
1255 
1256  if((myIsBufferSynced == false) ||
1257  (myBufferSequenceTranslation != translation))
1258  {
1259  if(!fixBuffer(translation))
1260  {
1261  return(myStatus.getStatus());
1262  }
1263  }
1264 
1265  // Write the record.
1266  unsigned int numBytesToWrite = myRecordPtr->myBlockSize + sizeof(int32_t);
1267  unsigned int numBytesWritten =
1268  ifwrite(filePtr, myRecordPtr, numBytesToWrite);
1269 
1270  // Return status based on if the correct number of bytes were written.
1271  if(numBytesToWrite == numBytesWritten)
1272  {
1273  return(SamStatus::SUCCESS);
1274  }
1275  // The correct number of bytes were not written.
1276  myStatus.setStatus(SamStatus::FAIL_IO, "Failed to write the entire record.");
1277  return(SamStatus::FAIL_IO);
1278 }
unsigned int ifwrite(IFILE file, const void *buffer, unsigned int size)
Write the specified number of bytes from the specified buffer into the file.
Definition: InputFile.h:669
Status getStatus() const
Return the enum for this status object.

References StatGenStatus::FAIL_IO, StatGenStatus::FAIL_ORDER, StatGenStatus::getStatus(), ifwrite(), InputFile::isOpen(), StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.


The documentation for this class was generated from the following files: