libStatGen Software  1
TestEquals.cpp
1 /*
2  * Copyright (C) 2010 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "TestEquals.h"
19 #include "SamQuerySeqWithRefHelper.h"
20 #include <assert.h>
21 
22 
23 
24 void testSeqEquals()
25 {
26  // Call generic test which since the sam and bam are identical, should
27  // contain the same results.
28  EqualsTest::testEq(EqualsTest::SAM);
29 #ifdef __ZLIB_AVAILABLE__
30  EqualsTest::testEq(EqualsTest::BAM);
31 #endif
32 }
33 
34 
35 const char* EqualsTest::READ_NAMES[] =
36  {"01:====", "02:===X", "03:==X=", "04:==XX", "05:=X==", "06:=X=X",
37  "07:=XX=", "08:=XXX", "09:X===", "10:X==X", "11:X=X=", "12:X=XX",
38  "13:XX==", "14:XX=X", "15:XXX=", "16:XXXX", "Read:GGCCTA;Ref:CCTA",
39  "Read:CCTA;Ref:CCTA", "Read:CCGTxxxC;Ref:CCxTAACC",
40  "Read:CCxxAC;Ref:CCTAACC", "chromNotInRef", "chromNotInRef1"};
41 
42 const char* EqualsTest::READ_SEQS_BASES[] =
43  {"CCTA", "CCTT", "CCAA", "CCAT", "CTTA", "CTTT", "CTAA", "CTAT", "TCTA", "TCTT", "TCAA", "TCAT", "TTTA", "TTTT", "TTAA", "TTAT", "GGCCTA",
44  "CCTA", "CCGTC", "CCAC", "CCTA", "CC=A"};
45 
46 const char* EqualsTest::READ_SEQS_EQUALS[] =
47  {"====", "===T", "==A=", "==AT", "=T==", "=T=T", "=TA=", "=TAT", "T===", "T==T", "T=A=", "T=AT", "TT==", "TT=T", "TTA=", "TTAT", "GG====",
48  "====", "==G==", "====", "CCTA", "CC=A"};
49 
50 const char* EqualsTest::READ_SEQS_MIXED[] =
51  {"C===", "=C=T", "==AA", "==AT", "=TTA", "CT=T", "=TAA", "=TAT", "T=TA", "TC=T", "TCA=", "TCAT", "TT=A", "TT=T", "TTA=", "TTAT", "GGC=T=",
52  "C=T=", "C=GT=", "C=A=", "CCTA", "CC=A"};
53 
54 const char* EqualsTest::expectedReferenceName;
55 const char* EqualsTest::expectedMateReferenceName;
56 const char* EqualsTest::expectedMateReferenceNameOrEqual;
57 const char* EqualsTest::expectedCigar;
58 const char* EqualsTest::expectedQuality;
59 
60 std::vector<unsigned int> EqualsTest::expectedCigarHex;
61 
62 int EqualsTest::expected0BasedAlignmentEnd;
63 int EqualsTest::expected1BasedAlignmentEnd;
64 int EqualsTest::expectedAlignmentLength;
65 int EqualsTest::expected0BasedUnclippedStart;
66 int EqualsTest::expected1BasedUnclippedStart;
67 int EqualsTest::expected0BasedUnclippedEnd;
68 int EqualsTest::expected1BasedUnclippedEnd;
69 bamRecordStruct EqualsTest::expectedRecord;
70 
71 void EqualsTest::testEq(FileType inputType)
72 {
73  reset();
74  SamFile inSam;
75 
76  std::string outputBase = "results/out";
77 
78  if(inputType == SAM)
79  {
80  assert(inSam.OpenForRead("testFiles/testEq.sam"));
81  outputBase += "SamEq";
82  }
83  else
84  {
85  assert(inSam.OpenForRead("testFiles/testEq.bam"));
86  outputBase += "BamEq";
87  }
88 
89  // Read the SAM Header.
90  SamFileHeader samHeader;
91  assert(inSam.ReadHeader(samHeader));
92 
93  std::string outputName = outputBase + "Bases.sam";
94  SamFile outBasesSam( outputName.c_str(), SamFile::WRITE);
95  outputName = outputBase + "Equals.sam";
96  SamFile outEqualsSam(outputName.c_str(), SamFile::WRITE);
97  outputName = outputBase + "Orig.sam";
98  SamFile outOrigSam( outputName.c_str(), SamFile::WRITE);
99  outputName = outputBase + "Bases.bam";
100  SamFile outBasesBam( outputName.c_str(), SamFile::WRITE);
101  outputName = outputBase + "Equals.bam";
102  SamFile outEqualsBam(outputName.c_str(), SamFile::WRITE);
103  outputName = outputBase + "Orig.bam";
104  SamFile outOrigBam( outputName.c_str(), SamFile::WRITE);
105  assert(outBasesSam.WriteHeader(samHeader));
106  assert(outEqualsSam.WriteHeader(samHeader));
107  assert(outOrigSam.WriteHeader(samHeader));
108  assert(outBasesBam.WriteHeader(samHeader));
109  assert(outEqualsBam.WriteHeader(samHeader));
110  assert(outOrigBam.WriteHeader(samHeader));
111 
112  outBasesSam.SetWriteSequenceTranslation(SamRecord::BASES);
113  outEqualsSam.SetWriteSequenceTranslation(SamRecord::EQUAL);
114  outOrigSam.SetWriteSequenceTranslation(SamRecord::NONE);
115  outBasesBam.SetWriteSequenceTranslation(SamRecord::BASES);
116  outEqualsBam.SetWriteSequenceTranslation(SamRecord::EQUAL);
117  outOrigBam.SetWriteSequenceTranslation(SamRecord::NONE);
118 
119  GenomeSequence reference("testFiles/chr1_partial.fa");
120 
121  inSam.SetReference(&reference);
122 
123  SamRecord samRecord;
124 
125  // The set of 16 variations are repeated 3 times: once with all charcters
126  // 1) Matches have the actual bases in them.
127  // 2) Matches have '='
128  // 3) Matches are mixed between bases and '='
129  // Since Sequences are 4 characters long, there are 16 variations
130  // of match/mismatch.
131  for(int j = 0; j < 16; j++)
132  {
133  assert(inSam.ReadRecord(samHeader, samRecord) == true);
134  validateEqRead(samRecord, j, READ_SEQS_BASES[j]);
135  assert(outBasesSam.WriteRecord(samHeader, samRecord));
136  assert(outEqualsSam.WriteRecord(samHeader, samRecord));
137  assert(outOrigSam.WriteRecord(samHeader, samRecord));
138  assert(outBasesBam.WriteRecord(samHeader, samRecord));
139  assert(outEqualsBam.WriteRecord(samHeader, samRecord));
140  assert(outOrigBam.WriteRecord(samHeader, samRecord));
141  }
142  for(int j = 0; j < 16; j++)
143  {
144  assert(inSam.ReadRecord(samHeader, samRecord) == true);
145  validateEqRead(samRecord, j, READ_SEQS_EQUALS[j]);
146  assert(outBasesSam.WriteRecord(samHeader, samRecord));
147  assert(outEqualsSam.WriteRecord(samHeader, samRecord));
148  assert(outOrigSam.WriteRecord(samHeader, samRecord));
149  assert(outBasesBam.WriteRecord(samHeader, samRecord));
150  assert(outEqualsBam.WriteRecord(samHeader, samRecord));
151  assert(outOrigBam.WriteRecord(samHeader, samRecord));
152  }
153  for(int j = 0; j < 16; j++)
154  {
155  assert(inSam.ReadRecord(samHeader, samRecord) == true);
156  validateEqRead(samRecord, j, READ_SEQS_MIXED[j]);
157  assert(outBasesSam.WriteRecord(samHeader, samRecord));
158  assert(outEqualsSam.WriteRecord(samHeader, samRecord));
159  assert(outOrigSam.WriteRecord(samHeader, samRecord));
160  assert(outBasesBam.WriteRecord(samHeader, samRecord));
161  assert(outEqualsBam.WriteRecord(samHeader, samRecord));
162  assert(outOrigBam.WriteRecord(samHeader, samRecord));
163  }
164 
165  expectedCigar = "2S4M";
166  expectedCigarHex.clear();
167  expectedCigarHex.push_back(0x24);
168  expectedCigarHex.push_back(0x40);
169  expected0BasedUnclippedStart = expectedRecord.myPosition-2;
170  expected1BasedUnclippedStart = expected0BasedUnclippedStart + 1;
171  expectedRecord.myBlockSize = 70;
172  expectedRecord.myReadNameLength = 21;
173  expectedRecord.myCigarLength = 2;
174  expectedRecord.myReadLength = 6;
175  expectedQuality = "??I00?";
176  assert(inSam.ReadRecord(samHeader, samRecord) == true);
177  validateEqRead(samRecord, 16, READ_SEQS_MIXED[16]);
178  assert(outBasesSam.WriteRecord(samHeader, samRecord));
179  assert(outEqualsSam.WriteRecord(samHeader, samRecord));
180  assert(outOrigSam.WriteRecord(samHeader, samRecord));
181  assert(outBasesBam.WriteRecord(samHeader, samRecord));
182  assert(outEqualsBam.WriteRecord(samHeader, samRecord));
183  assert(outOrigBam.WriteRecord(samHeader, samRecord));
184 
185  expectedCigar = "4M4H";
186  expectedCigarHex.clear();
187  expectedCigarHex.push_back(0x40);
188  expectedCigarHex.push_back(0x45);
189  expected0BasedUnclippedStart = expectedRecord.myPosition;
190  expected1BasedUnclippedStart = expected0BasedUnclippedStart + 1;
191  expected0BasedUnclippedEnd = expectedRecord.myPosition + 7;
192  expected1BasedUnclippedEnd = expected0BasedUnclippedEnd + 1;
193  expectedRecord.myBlockSize = 65;
194  expectedRecord.myReadNameLength = 19;
195  expectedRecord.myCigarLength = 2;
196  expectedRecord.myReadLength = 4;
197  expectedQuality = "I00?";
198  assert(inSam.ReadRecord(samHeader, samRecord) == true);
199  validateEqRead(samRecord, 17, READ_SEQS_MIXED[17]);
200  assert(outBasesSam.WriteRecord(samHeader, samRecord));
201  assert(outEqualsSam.WriteRecord(samHeader, samRecord));
202  assert(outOrigSam.WriteRecord(samHeader, samRecord));
203  assert(outBasesBam.WriteRecord(samHeader, samRecord));
204  assert(outEqualsBam.WriteRecord(samHeader, samRecord));
205  assert(outOrigBam.WriteRecord(samHeader, samRecord));
206 
207  expectedCigar = "1M1P1M1I1M3D1M";
208  expectedCigarHex.clear();
209  expectedCigarHex.push_back(0x10);
210  expectedCigarHex.push_back(0x16);
211  expectedCigarHex.push_back(0x10);
212  expectedCigarHex.push_back(0x11);
213  expectedCigarHex.push_back(0x10);
214  expectedCigarHex.push_back(0x32);
215  expectedCigarHex.push_back(0x10);
216  expected0BasedAlignmentEnd = expectedRecord.myPosition + 6;
217  expected1BasedAlignmentEnd = expected0BasedAlignmentEnd + 1;
218  expectedAlignmentLength = 7;
219  expected0BasedUnclippedStart = expectedRecord.myPosition;
220  expected1BasedUnclippedStart = expected0BasedUnclippedStart + 1;
221  expected0BasedUnclippedEnd = expected0BasedAlignmentEnd;
222  expected1BasedUnclippedEnd = expected0BasedUnclippedEnd + 1;
223  expectedRecord.myBlockSize = 95;
224  expectedRecord.myReadNameLength = 27;
225  expectedRecord.myCigarLength = 7;
226  expectedRecord.myReadLength = 5;
227  expectedQuality = "I00??";
228  assert(inSam.ReadRecord(samHeader, samRecord) == true);
229  validateEqRead(samRecord, 18, READ_SEQS_MIXED[18]);
230  assert(outBasesSam.WriteRecord(samHeader, samRecord));
231  assert(outEqualsSam.WriteRecord(samHeader, samRecord));
232  assert(outOrigSam.WriteRecord(samHeader, samRecord));
233  assert(outBasesBam.WriteRecord(samHeader, samRecord));
234  assert(outEqualsBam.WriteRecord(samHeader, samRecord));
235  assert(outOrigBam.WriteRecord(samHeader, samRecord));
236 
237  expectedCigar = "2M2N2M";
238  expectedCigarHex.clear();
239  expectedCigarHex.push_back(0x20);
240  expectedCigarHex.push_back(0x23);
241  expectedCigarHex.push_back(0x20);
242  expected0BasedAlignmentEnd = expectedRecord.myPosition + 5;
243  expected1BasedAlignmentEnd = expected0BasedAlignmentEnd + 1;
244  expectedAlignmentLength = 6;
245  expected0BasedUnclippedStart = expectedRecord.myPosition;
246  expected1BasedUnclippedStart = expected0BasedUnclippedStart + 1;
247  expected0BasedUnclippedEnd = expected0BasedAlignmentEnd;
248  expected1BasedUnclippedEnd = expected0BasedUnclippedEnd + 1;
249  expectedRecord.myBlockSize = 74;
250  expectedRecord.myReadNameLength = 24;
251  expectedRecord.myCigarLength = 3;
252  expectedRecord.myReadLength = 4;
253  expectedQuality = "I00?";
254  assert(inSam.ReadRecord(samHeader, samRecord) == true);
255  validateEqRead(samRecord, 19, READ_SEQS_MIXED[19]);
256  assert(outBasesSam.WriteRecord(samHeader, samRecord));
257  assert(outEqualsSam.WriteRecord(samHeader, samRecord));
258  assert(outOrigSam.WriteRecord(samHeader, samRecord));
259  assert(outBasesBam.WriteRecord(samHeader, samRecord));
260  assert(outEqualsBam.WriteRecord(samHeader, samRecord));
261  assert(outOrigBam.WriteRecord(samHeader, samRecord));
262 
263  // Test getNextMatchMismatch.
264  SamSingleBaseMatchInfo matchTest;
265  SamQuerySeqWithRefIter queryIter(samRecord, reference, true);
266  assert(queryIter.getNextMatchMismatch(matchTest) == true);
267  assert(matchTest.getType() == SamSingleBaseMatchInfo::MATCH);
268  assert(matchTest.getQueryIndex() == 0);
269  assert(queryIter.getNextMatchMismatch(matchTest) == true);
270  assert(matchTest.getType() == SamSingleBaseMatchInfo::MATCH);
271  assert(matchTest.getQueryIndex() == 1);
272  assert(queryIter.getNextMatchMismatch(matchTest) == true);
273  assert(matchTest.getType() == SamSingleBaseMatchInfo::MATCH);
274  assert(matchTest.getQueryIndex() == 2);
275  assert(queryIter.getNextMatchMismatch(matchTest) == true);
276  assert(matchTest.getType() == SamSingleBaseMatchInfo::MATCH);
277  assert(matchTest.getQueryIndex() == 3);
278  assert(queryIter.getNextMatchMismatch(matchTest) == false);
279 
280  // Check the read that is on a different chormosome not
281  // found in the reference.
282  reset();
283  expectedRecord.myBlockSize = 56;
284  expectedRecord.myReadNameLength = 14;
285  expectedRecord.myReferenceID = 1;
286  expectedReferenceName = "2";
287  expectedRecord.myMateReferenceID = 1;
288  expectedMateReferenceName = "2";
289 
290  assert(inSam.ReadRecord(samHeader, samRecord) == true);
291  validateEqRead(samRecord, 20, READ_SEQS_MIXED[20]);
292  assert(outBasesSam.WriteRecord(samHeader, samRecord));
293  assert(outEqualsSam.WriteRecord(samHeader, samRecord));
294  assert(outOrigSam.WriteRecord(samHeader, samRecord));
295  assert(outBasesBam.WriteRecord(samHeader, samRecord));
296  assert(outEqualsBam.WriteRecord(samHeader, samRecord));
297  assert(outOrigBam.WriteRecord(samHeader, samRecord));
298 
299  // Check the read that is on a different chormosome and
300  // has '=', but the chromosome is not found in the reference.
301  reset();
302  expectedRecord.myBlockSize = 57;
303  expectedRecord.myReadNameLength = 15;
304  expectedRecord.myReferenceID = 1;
305  expectedReferenceName = "2";
306  expectedRecord.myMateReferenceID = 1;
307  expectedMateReferenceName = "2";
308 
309  assert(inSam.ReadRecord(samHeader, samRecord) == true);
310  validateEqRead(samRecord, 21, READ_SEQS_MIXED[21]);
311  assert(outBasesSam.WriteRecord(samHeader, samRecord));
312  assert(outEqualsSam.WriteRecord(samHeader, samRecord));
313  assert(outOrigSam.WriteRecord(samHeader, samRecord));
314  assert(outBasesBam.WriteRecord(samHeader, samRecord));
315  assert(outEqualsBam.WriteRecord(samHeader, samRecord));
316  assert(outOrigBam.WriteRecord(samHeader, samRecord));
317 
318  SamQuerySeqWithRefIter queryIter2(samRecord, reference, true);
319  assert(queryIter2.getNextMatchMismatch(matchTest) == false);
320 }
321 
322 
323 void EqualsTest::reset()
324 {
325  expectedReferenceName = "1";
326  expectedMateReferenceName = "1";
327  expectedMateReferenceNameOrEqual = "=";
328  expectedCigar = "4M";
329  expectedQuality = "I00?";
330 
331  // The First cigar is 4M which is 4 << 4 | 0 = 0x40 = 64
332  expectedCigarHex.clear();
333  expectedCigarHex.push_back(0x40);
334 
335  expectedRecord.myBlockSize = 50;
336  expectedRecord.myReferenceID = 0;
337  expectedRecord.myPosition = 10010;
338  expectedRecord.myReadNameLength = 8;
339  expectedRecord.myMapQuality = 0;
340  expectedRecord.myBin = 4681;
341  expectedRecord.myCigarLength = 1;
342  expectedRecord.myFlag = 73;
343  expectedRecord.myReadLength = 4;
344  expectedRecord.myMateReferenceID = 0;
345  expectedRecord.myMatePosition = 10008;
346  expectedRecord.myInsertSize = 0;
347 
348  expected0BasedAlignmentEnd = 10013;
349  expected1BasedAlignmentEnd = expected0BasedAlignmentEnd + 1;
350  expectedAlignmentLength = 4;
351  expected0BasedUnclippedStart = expectedRecord.myPosition;
352  expected1BasedUnclippedStart = expected0BasedUnclippedStart + 1;
353  expected0BasedUnclippedEnd = expected0BasedAlignmentEnd;
354  expected1BasedUnclippedEnd = expected1BasedAlignmentEnd;
355 }
356 
357 void EqualsTest::validateEqRead(SamRecord& samRecord,
358  int readIndex,
359  const char* actualExpectedSequence)
360 {
361  char tag[3];
362  char type;
363  void* value;
364 
365  //////////////////////////////////////////
366  // Validate Record 1
367  // Check the alignment end
368  assert(samRecord.get0BasedAlignmentEnd() == expected0BasedAlignmentEnd);
369  assert(samRecord.get1BasedAlignmentEnd() == expected1BasedAlignmentEnd);
370  assert(samRecord.getAlignmentLength() == expectedAlignmentLength);
371  assert(samRecord.get0BasedUnclippedStart() == expected0BasedUnclippedStart);
372  assert(samRecord.get1BasedUnclippedStart() == expected1BasedUnclippedStart);
373  assert(samRecord.get0BasedUnclippedEnd() == expected0BasedUnclippedEnd);
374  assert(samRecord.get1BasedUnclippedEnd() == expected1BasedUnclippedEnd);
375 
376  // Check the accessors.
377  assert(samRecord.getBlockSize() == expectedRecord.myBlockSize);
378  assert(samRecord.getReferenceID() == expectedRecord.myReferenceID);
379  assert(strcmp(samRecord.getReferenceName(), expectedReferenceName) == 0);
380  assert(samRecord.get1BasedPosition() == expectedRecord.myPosition + 1);
381  assert(samRecord.get0BasedPosition() == expectedRecord.myPosition);
382  assert(samRecord.getReadNameLength() ==
383  expectedRecord.myReadNameLength);
384  assert(samRecord.getMapQuality() == expectedRecord.myMapQuality);
385  assert(samRecord.getBin() == expectedRecord.myBin);
386  assert(samRecord.getCigarLength() == expectedRecord.myCigarLength);
387  assert(samRecord.getFlag() == expectedRecord.myFlag);
388  assert(samRecord.getReadLength() == expectedRecord.myReadLength);
389  assert(samRecord.getMateReferenceID() ==
390  expectedRecord.myMateReferenceID);
391  assert(strcmp(samRecord.getMateReferenceName(),
392  expectedMateReferenceName) == 0);
393  assert(strcmp(samRecord.getMateReferenceNameOrEqual(),
394  expectedMateReferenceNameOrEqual) == 0);
395  assert(samRecord.get1BasedMatePosition() ==
396  expectedRecord.myMatePosition + 1);
397  assert(samRecord.get0BasedMatePosition() ==
398  expectedRecord.myMatePosition);
399  assert(samRecord.getInsertSize() == expectedRecord.myInsertSize);
400  assert(strcmp(samRecord.getReadName(), READ_NAMES[readIndex]) == 0);
401  assert(strcmp(samRecord.getCigar(), expectedCigar) == 0);
403  assert(strcmp(samRecord.getSequence(), READ_SEQS_BASES[readIndex]) == 0);
404  assert(strcmp(samRecord.getQuality(), expectedQuality) == 0);
405 
406  assert(samRecord.getSequence(0) == READ_SEQS_BASES[readIndex][0]);
407  assert(samRecord.getQuality(0) == expectedQuality[0]);
408  assert(samRecord.getSequence(1)== READ_SEQS_BASES[readIndex][1]);
409  assert(samRecord.getQuality(1) == expectedQuality[1]);
410  assert(samRecord.getSequence(2) == READ_SEQS_BASES[readIndex][2]);
411  assert(samRecord.getQuality(2) == expectedQuality[2]);
412  assert(samRecord.getSequence(3) == READ_SEQS_BASES[readIndex][3]);
413  assert(samRecord.getQuality(3) == expectedQuality[3]);
414 
415  assert(strcmp(samRecord.getSequence(SamRecord::EQUAL),
416  READ_SEQS_EQUALS[readIndex]) == 0);
417  assert(samRecord.getSequence(0, SamRecord::EQUAL) ==
418  READ_SEQS_EQUALS[readIndex][0]);
419  assert(samRecord.getQuality(0) == expectedQuality[0]);
420  assert(samRecord.getSequence(1, SamRecord::EQUAL) ==
421  READ_SEQS_EQUALS[readIndex][1]);
422  assert(samRecord.getQuality(1) == expectedQuality[1]);
423  assert(samRecord.getSequence(2, SamRecord::EQUAL) ==
424  READ_SEQS_EQUALS[readIndex][2]);
425  assert(samRecord.getQuality(2) == expectedQuality[2]);
426  assert(samRecord.getSequence(3, SamRecord::EQUAL) ==
427  READ_SEQS_EQUALS[readIndex][3]);
428  assert(samRecord.getQuality(3) == expectedQuality[3]);
429 
430  assert(strcmp(samRecord.getSequence(SamRecord::NONE),
431  actualExpectedSequence) == 0);
432  assert(samRecord.getSequence(0, SamRecord::NONE) ==
433  actualExpectedSequence[0]);
434  assert(samRecord.getQuality(0) == expectedQuality[0]);
435  assert(samRecord.getSequence(1, SamRecord::NONE) ==
436  actualExpectedSequence[1]);
437  assert(samRecord.getQuality(1) == expectedQuality[1]);
438  assert(samRecord.getSequence(2, SamRecord::NONE) ==
439  actualExpectedSequence[2]);
440  assert(samRecord.getQuality(2) == expectedQuality[2]);
441  assert(samRecord.getSequence(3, SamRecord::NONE) ==
442  actualExpectedSequence[3]);
443  assert(samRecord.getQuality(3) == expectedQuality[3]);
444 
446  assert(strcmp(samRecord.getSequence(),
447  actualExpectedSequence) == 0);
448  assert(samRecord.getSequence(0) ==
449  actualExpectedSequence[0]);
450  assert(samRecord.getQuality(0) == expectedQuality[0]);
451  assert(samRecord.getSequence(1) ==
452  actualExpectedSequence[1]);
453  assert(samRecord.getQuality(1) == expectedQuality[1]);
454  assert(samRecord.getSequence(2) ==
455  actualExpectedSequence[2]);
456  assert(samRecord.getQuality(2) == expectedQuality[2]);
457  assert(samRecord.getSequence(3) ==
458  actualExpectedSequence[3]);
459  assert(samRecord.getQuality(3) == expectedQuality[3]);
460 
461  // No tags, should return false.
462 
463  assert(samRecord.getNextSamTag(tag, type, &value) == false);
464 
465  // Get the record ptr.
467  validateEqReadBuffer(samRecord, READ_SEQS_BASES[readIndex]);
469  validateEqReadBuffer(samRecord, actualExpectedSequence);
471  validateEqReadBuffer(samRecord, READ_SEQS_EQUALS[readIndex]);
472 }
473 
474 
475 void EqualsTest::validateEqReadBuffer(SamRecord& samRecord,
476  const char* expectedSequence)
477 {
478  const bamRecordStruct* bufferPtr;
479  unsigned char* varPtr;
480 
481  bufferPtr = (const bamRecordStruct*)samRecord.getRecordBuffer();
482  // Validate the buffers match.
483  assert(bufferPtr->myBlockSize == expectedRecord.myBlockSize);
484  assert(bufferPtr->myReferenceID == expectedRecord.myReferenceID);
485  assert(bufferPtr->myPosition == expectedRecord.myPosition);
486  assert(bufferPtr->myReadNameLength == expectedRecord.myReadNameLength);
487  assert(bufferPtr->myMapQuality == expectedRecord.myMapQuality);
488  assert(bufferPtr->myBin == expectedRecord.myBin);
489  assert(bufferPtr->myCigarLength == expectedRecord.myCigarLength);
490  assert(bufferPtr->myFlag == expectedRecord.myFlag);
491  assert(bufferPtr->myReadLength == expectedRecord.myReadLength);
492  assert(bufferPtr->myMateReferenceID ==
493  expectedRecord.myMateReferenceID);
494  assert(bufferPtr->myMatePosition == expectedRecord.myMatePosition);
495  assert(bufferPtr->myInsertSize == expectedRecord.myInsertSize);
496 
497  // Validate the variable length fields in the buffer.
498  // Set the pointer to the start of the variable fields.
499  varPtr = (unsigned char*)(&(bufferPtr->myData[0]));
500 
501  // Validate the readname.
502  for(int i = 0; i < expectedRecord.myReadNameLength; i++)
503  {
504  assert(*varPtr == samRecord.getReadName()[i]);
505  varPtr++;
506  }
507 
508  // Validate the cigar.
509  for(int i = 0; i < expectedRecord.myCigarLength; i++)
510  {
511  assert(*(unsigned int*)varPtr == expectedCigarHex[i]);
512  // Increment the varptr the size of an int.
513  varPtr += 4;
514  }
515 
516  // Validate the sequence.
517  int expectedSeqHex = 0;
518  for(int i = 0; i < expectedRecord.myReadLength; i++)
519  {
520  int hexChar = 0x0;
521  switch(expectedSequence[i])
522  {
523  case '=':
524  hexChar = 0x0;
525  break;
526  case 'A':
527  case 'a':
528  hexChar = 0x1;
529  break;
530  case 'C':
531  case 'c':
532  hexChar = 0x2;
533  break;
534  case 'G':
535  case 'g':
536  hexChar = 0x4;
537  break;
538  case 'T':
539  case 't':
540  hexChar = 0x8;
541  break;
542  case 'N':
543  case 'n':
544  hexChar = 0xF;
545  break;
546  }
547  if(i%2 == 0)
548  {
549  expectedSeqHex = hexChar << 4;
550  }
551  else
552  {
553  expectedSeqHex |= hexChar;
554  assert(*varPtr == expectedSeqHex);
555  varPtr++;
556  }
557  }
558  if((expectedRecord.myReadLength%2) != 0)
559  {
560  // Odd number of sequences, so need to check the last one.
561  assert(*varPtr == expectedSeqHex);
562  varPtr++;
563  }
564 
565  // Validate the Quality
566  for(int i = 0; i < expectedRecord.myReadLength; i++)
567  {
568  assert(*varPtr == samRecord.getQuality()[i] - 33);
569  varPtr++;
570  }
571 
572 }
573 
574 
Create/Access/Modify/Load Genome Sequences stored as binary mapped files.
This class allows a user to get/set the fields in a SAM/BAM Header.
Definition: SamFileHeader.h:35
Allows the user to easily read/write a SAM/BAM file.
Definition: SamFile.h:36
bool ReadHeader(SamFileHeader &header)
Reads the header section from the file and stores it in the passed in header.
Definition: SamFile.cpp:450
bool ReadRecord(SamFileHeader &header, SamRecord &record)
Reads the next record from the file & stores it in the passed in record.
Definition: SamFile.cpp:514
void SetReference(GenomeSequence *reference)
Sets the reference to the specified genome sequence object.
Definition: SamFile.cpp:380
@ WRITE
open for writing.
Definition: SamFile.h:41
bool OpenForRead(const char *filename, SamFileHeader *header=NULL)
Open a sam/bam file for reading with the specified filename, determing the type of file and SAM/BAM b...
Definition: SamFile.cpp:93
Iterates through the query and compare with reference.
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record.
Definition: SamRecord.h:52
int32_t getBlockSize()
Get the block size of the record (BAM format).
Definition: SamRecord.cpp:1281
uint16_t getCigarLength()
Get the length of the BAM formatted CIGAR.
Definition: SamRecord.cpp:1362
const char * getReferenceName()
Get the reference sequence name (RNAME) of the record.
Definition: SamRecord.cpp:1298
@ NONE
Leave the sequence as is.
Definition: SamRecord.h:58
@ BASES
Translate '=' to the actual base.
Definition: SamRecord.h:60
@ EQUAL
Translate bases that match the reference to '='.
Definition: SamRecord.h:59
int32_t getInsertSize()
Get the inferred insert size of the read pair (ISIZE) or observed template length (TLEN).
Definition: SamRecord.cpp:1459
int32_t get0BasedMatePosition()
Get the 0-based(BAM) leftmost mate/next fragment's position.
Definition: SamRecord.cpp:1452
int32_t get1BasedPosition()
Get the 1-based(SAM) leftmost position (POS) of the record.
Definition: SamRecord.cpp:1312
int32_t getReferenceID()
Get the reference sequence id of the record (BAM format rid).
Definition: SamRecord.cpp:1305
int32_t getAlignmentLength()
Returns the length of the clipped sequence, returning 0 if the cigar is '*'.
Definition: SamRecord.cpp:1493
int32_t get1BasedAlignmentEnd()
Returns the 1-based inclusive rightmost position of the clipped sequence.
Definition: SamRecord.cpp:1486
uint8_t getReadNameLength()
Get the length of the readname (QNAME) including the null.
Definition: SamRecord.cpp:1326
const char * getMateReferenceNameOrEqual()
Get the mate/next fragment's reference sequence name (RNEXT), returning "=" if it is the same as the ...
Definition: SamRecord.cpp:1420
int32_t get1BasedUnclippedStart()
Returns the 1-based inclusive left-most position adjusted for clipped bases.
Definition: SamRecord.cpp:1519
uint16_t getBin()
Get the BAM bin for the record.
Definition: SamRecord.cpp:1347
int32_t getMateReferenceID()
Get the mate reference id of the record (BAM format: mate_rid/next_refID).
Definition: SamRecord.cpp:1438
uint16_t getFlag()
Get the flag (FLAG).
Definition: SamRecord.cpp:1384
const void * getRecordBuffer()
Get a const pointer to the buffer that contains the BAM representation of the record.
Definition: SamRecord.cpp:1204
void setSequenceTranslation(SequenceTranslation translation)
Set the type of sequence translation to use when getting the sequence.
Definition: SamRecord.cpp:187
int32_t get1BasedMatePosition()
Get the 1-based(SAM) leftmost mate/next fragment's position (PNEXT).
Definition: SamRecord.cpp:1445
int32_t get0BasedUnclippedEnd()
Returns the 0-based inclusive right-most position adjusted for clipped bases.
Definition: SamRecord.cpp:1526
int32_t get1BasedUnclippedEnd()
Returns the 1-based inclusive right-most position adjusted for clipped bases.
Definition: SamRecord.cpp:1535
const char * getMateReferenceName()
Get the mate/next fragment's reference sequence name (RNEXT).
Definition: SamRecord.cpp:1410
bool getNextSamTag(char *tag, char &vtype, void **value)
Get the next tag from the record.
Definition: SamRecord.cpp:1962
int32_t get0BasedUnclippedStart()
Returns the 0-based inclusive left-most position adjusted for clipped bases.
Definition: SamRecord.cpp:1506
int32_t getReadLength()
Get the length of the read.
Definition: SamRecord.cpp:1391
int32_t get0BasedAlignmentEnd()
Returns the 0-based inclusive rightmost position of the clipped sequence.
Definition: SamRecord.cpp:1467
int32_t get0BasedPosition()
Get the 0-based(BAM) leftmost position of the record.
Definition: SamRecord.cpp:1319
const char * getCigar()
Returns the SAM formatted CIGAR string.
Definition: SamRecord.cpp:1555
uint8_t getMapQuality()
Get the mapping quality (MAPQ) of the record.
Definition: SamRecord.cpp:1340
const char * getReadName()
Returns the SAM formatted Read Name (QNAME).
Definition: SamRecord.cpp:1542
const char * getQuality()
Returns the SAM formatted quality string (QUAL).
Definition: SamRecord.cpp:1638
const char * getSequence()
Returns the SAM formatted sequence string (SEQ), translating the base as specified by setSequenceTran...
Definition: SamRecord.cpp:1568
This class contains the match/mismatch information between the reference and a read for a single base...
int32_t getQueryIndex()
Get the query index for this object.
Type getType()
Get the type (match/mismatch/unknown) for this object.
Structure of a BAM record.
Definition: SamRecord.h:34