libpappsomspp
Library for mass spectrometry
aamodification.cpp
Go to the documentation of this file.
1 /**
2  * \file pappsomspp/amino_acid/aamodification.h
3  * \date 7/3/2015
4  * \author Olivier Langella
5  * \brief amino acid modification model
6  */
7 
8 /*******************************************************************************
9  * Copyright (c) 2015 Olivier Langella <Olivier.Langella@moulon.inra.fr>.
10  *
11  * This file is part of the PAPPSOms++ library.
12  *
13  * PAPPSOms++ is free software: you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by
15  * the Free Software Foundation, either version 3 of the License, or
16  * (at your option) any later version.
17  *
18  * PAPPSOms++ is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25  *
26  * Contributors:
27  * Olivier Langella <Olivier.Langella@moulon.inra.fr> - initial API and
28  *implementation
29  ******************************************************************************/
30 
31 #include <QRegExp>
32 #include <QDebug>
33 #include <cmath>
34 
35 #include "aamodification.h"
36 #include "aa.h"
37 #include "../pappsoexception.h"
38 #include "../mzrange.h"
39 #include "../peptide/peptide.h"
40 #include "../obo/filterobopsimodsink.h"
41 #include "../obo/filterobopsimodtermaccession.h"
42 #include "../exception/exceptionnotfound.h"
43 
44 /*
45 
46 inline void initMyResource() {
47  Q_INIT_RESOURCE(resources);
48 }
49 */
50 
51 namespace pappso
52 {
53 
55 
56 AaModification::AaModification(const QString &accession, pappso_double mass)
57  : m_accession(accession), m_mass(mass)
58 {
64 
65  m_mapIsotope = {{Isotope::C13, 0},
66  {Isotope::H2, 0},
67  {Isotope::N15, 0},
68  {Isotope::O17, 0},
69  {Isotope::O18, 0},
70  {Isotope::S33, 0},
71  {Isotope::S34, 0},
72  {Isotope::S36, 0}};
73 }
74 
75 
76 AaModification::AaModification(AaModification &&toCopy) // move constructor
77  : m_accession(toCopy.m_accession),
78  m_name(toCopy.m_name),
79  m_mass(toCopy.m_mass),
80  m_atomCount(std::move(toCopy.m_atomCount)),
81  m_mapIsotope(toCopy.m_mapIsotope)
82 {
83  m_origin = toCopy.m_origin;
84 }
85 
87 {
88 }
89 
90 const QString &
92 {
93 
94  qDebug();
95  return m_accession;
96 }
97 const QString &
99 {
100  return m_name;
101 }
104  MapAccessionModifications ret;
105 
106  return ret;
107  }();
108 
111 {
112  AaModification *new_mod;
113  // qDebug() << " AaModification::createInstance begin";
114  new_mod = new AaModification(term.m_accession, term.m_diffMono);
115  // xref: DiffFormula: "C 0 H 0 N 0 O 1 S 0"
116  new_mod->setDiffFormula(term.m_diffFormula);
117  new_mod->setXrefOrigin(term.m_origin);
118  new_mod->m_name = term.m_name;
119  return new_mod;
120 }
121 
123 AaModification::createInstance(const QString &accession)
124 {
125  if(accession == "internal:Nter_hydrolytic_cleavage_H")
126  {
127  OboPsiModTerm term;
128  term.m_accession = accession;
129  term.m_diffFormula = "H 1";
130  term.m_diffMono = MPROTIUM;
131  term.m_name = "Nter hydrolytic cleavage H+";
132  return (AaModification::createInstance(term));
133  }
134  if(accession == "internal:Cter_hydrolytic_cleavage_HO")
135  {
136  OboPsiModTerm term;
137  term.m_accession = accession;
138  term.m_diffFormula = "H 1 O 1";
139  term.m_diffMono = MPROTIUM + MASSOXYGEN;
140  term.m_name = "Cter hydrolytic cleavage HO";
141  return (AaModification::createInstance(term));
142  }
143  if(accession.startsWith("MUTATION:"))
144  {
145  QRegExp regexp_mutation("^MUTATION:([A-Z])=>([A-Z])$");
146  if(regexp_mutation.exactMatch(accession))
147  {
148  qDebug() << regexp_mutation.capturedTexts()[1].at(0) << " "
149  << regexp_mutation.capturedTexts()[2].at(0);
150 
151  Aa aa_from(
152  regexp_mutation.capturedTexts()[1].toStdString().c_str()[0]);
153  Aa aa_to(regexp_mutation.capturedTexts()[2].toStdString().c_str()[0]);
154  AaModificationP instance_mutation =
155  createInstanceMutation(aa_from, aa_to);
156  return instance_mutation;
157  // m_psiModLabel<<"|";
158  }
159  }
160  // initMyResource();
161  FilterOboPsiModSink term_list;
162  FilterOboPsiModTermAccession filterm_accession(term_list, accession);
163 
164  OboPsiMod psimod(filterm_accession);
165 
166  try
167  {
168  return (AaModification::createInstance(term_list.getOne()));
169  }
170  catch(ExceptionNotFound &e)
171  {
172  throw ExceptionNotFound(QObject::tr("modification not found : [%1]\n%2")
173  .arg(accession)
174  .arg(e.qwhat()));
175  }
176 }
177 
178 void
179 AaModification::setXrefOrigin(const QString &origin)
180 {
181  // xref: Origin: "N"
182  // xref: Origin: "X"
183  m_origin = origin;
184 }
185 void
186 AaModification::setDiffFormula(const QString &diff_formula)
187 {
188  QRegExp rx("(^|\\s)([C,H,O,N,H,S])\\s([-]{0,1}\\d+)");
189  int pos = 0;
190 
191  while((pos = rx.indexIn(diff_formula, pos)) != -1)
192  {
193  qDebug() << rx.cap(2) << " " << rx.cap(3);
194  if(rx.cap(2) == "C")
195  {
196  m_atomCount[AtomIsotopeSurvey::C] = rx.cap(3).toInt();
197  }
198  else if(rx.cap(2) == "H")
199  {
200  m_atomCount[AtomIsotopeSurvey::H] = rx.cap(3).toInt();
201  }
202  else if(rx.cap(2) == "N")
203  {
204  m_atomCount[AtomIsotopeSurvey::N] = rx.cap(3).toInt();
205  }
206  else if(rx.cap(2) == "O")
207  {
208  m_atomCount[AtomIsotopeSurvey::O] = rx.cap(3).toInt();
209  }
210  else if(rx.cap(2) == "S")
211  {
212  m_atomCount[AtomIsotopeSurvey::S] = rx.cap(3).toInt();
213  }
214  pos += rx.matchedLength();
215  }
216 
217  // look for isotopes :
218  rx.setPattern("\\(([-]{0,1}\\d+)\\)([C,H,O,N,H,S])\\s([-]{0,1}\\d+)");
219  pos = 0;
220 
221  while((pos = rx.indexIn(diff_formula, pos)) != -1)
222  {
223  qDebug() << rx.cap(1) << " " << rx.cap(2) << " " << rx.cap(3);
224  int number_of_isotopes = rx.cap(3).toInt();
225  if(rx.cap(2) == "C")
226  {
227  if(rx.cap(1) == "13")
228  {
229  m_mapIsotope.at(Isotope::C13) = number_of_isotopes;
230  }
231  m_atomCount[AtomIsotopeSurvey::C] += number_of_isotopes;
232  }
233  else if(rx.cap(2) == "H")
234  {
235  if(rx.cap(1) == "2")
236  {
237  m_mapIsotope.at(Isotope::H2) = number_of_isotopes;
238  }
239  m_atomCount[AtomIsotopeSurvey::H] += number_of_isotopes;
240  }
241  else if(rx.cap(2) == "N")
242  {
243  if(rx.cap(1) == "15")
244  {
245  m_mapIsotope.at(Isotope::N15) = number_of_isotopes;
246  }
247  m_atomCount[AtomIsotopeSurvey::N] += number_of_isotopes;
248  }
249  else if(rx.cap(2) == "O")
250  {
251  if(rx.cap(1) == "17")
252  {
253  m_mapIsotope.at(Isotope::O17) = number_of_isotopes;
254  }
255  else if(rx.cap(1) == "18")
256  {
257  m_mapIsotope.at(Isotope::O18) = number_of_isotopes;
258  }
259  m_atomCount[AtomIsotopeSurvey::O] += number_of_isotopes;
260  }
261  else if(rx.cap(2) == "S")
262  {
263  if(rx.cap(1) == "33")
264  {
265  m_mapIsotope.at(Isotope::S33) = number_of_isotopes;
266  }
267  else if(rx.cap(1) == "34")
268  {
269  m_mapIsotope.at(Isotope::S34) = number_of_isotopes;
270  }
271  else if(rx.cap(1) == "36")
272  {
273  m_mapIsotope.at(Isotope::S36) = number_of_isotopes;
274  }
275  m_atomCount[AtomIsotopeSurvey::S] += number_of_isotopes;
276  }
277  pos += rx.matchedLength();
278  }
279 
280 
282 }
283 
284 
285 void
287 {
288  pappso_double theoreticalm_mass = 0;
289  std::map<AtomIsotopeSurvey, int>::const_iterator it_atom =
291  if(it_atom != m_atomCount.end())
292  {
293  theoreticalm_mass += MASSCARBON * (it_atom->second);
294  }
295  it_atom = m_atomCount.find(AtomIsotopeSurvey::H);
296  if(it_atom != m_atomCount.end())
297  {
298  theoreticalm_mass += MPROTIUM * (it_atom->second);
299  }
300 
301  it_atom = m_atomCount.find(AtomIsotopeSurvey::O);
302  if(it_atom != m_atomCount.end())
303  {
304  theoreticalm_mass += MASSOXYGEN * (it_atom->second);
305  }
306 
307  it_atom = m_atomCount.find(AtomIsotopeSurvey::N);
308  if(it_atom != m_atomCount.end())
309  {
310  theoreticalm_mass += MASSNITROGEN * (it_atom->second);
311  }
312  it_atom = m_atomCount.find(AtomIsotopeSurvey::S);
313  if(it_atom != m_atomCount.end())
314  {
315  theoreticalm_mass += MASSSULFUR * (it_atom->second);
316  }
317 
318  qDebug() << theoreticalm_mass;
319 
320  theoreticalm_mass += DIFFC12C13 * m_mapIsotope.at(Isotope::C13);
321  theoreticalm_mass += DIFFH1H2 * m_mapIsotope.at(Isotope::H2);
322  theoreticalm_mass += DIFFN14N15 * m_mapIsotope.at(Isotope::N15);
323  theoreticalm_mass += DIFFO16O17 * m_mapIsotope.at(Isotope::O17);
324  theoreticalm_mass += DIFFO16O18 * m_mapIsotope.at(Isotope::O18);
325  theoreticalm_mass += DIFFS32S33 * m_mapIsotope.at(Isotope::S33);
326  theoreticalm_mass += DIFFS32S34 * m_mapIsotope.at(Isotope::S34);
327  theoreticalm_mass += DIFFS32S36 * m_mapIsotope.at(Isotope::S36);
328 
329 
330  pappso_double diff = std::fabs((pappso_double)m_mass - theoreticalm_mass);
331  if(diff < 0.001)
332  {
333  m_mass = theoreticalm_mass;
334  qDebug() << diff;
335  }
336  else
337  {
338  qDebug()
339  << "ERROR in AaModification::calculateMassFromChemicalComponents theo="
340  << theoreticalm_mass << " m=" << m_mass << " diff=" << diff
341  << " accession=" << m_accession;
342  }
343 }
344 
347 {
348  QString accession = QString("%1").arg(modificationMass);
349  qDebug() << accession;
350  QMutexLocker locker(&m_mutex);
351  if(m_mapAccessionModifications.find(accession) ==
353  {
354  // not found
355  m_mapAccessionModifications.insert(std::pair<QString, AaModification *>(
356  accession, new AaModification(accession, modificationMass)));
357  }
358  else
359  {
360  // found
361  }
362  return m_mapAccessionModifications.at(accession);
363 }
364 
366 AaModification::getInstance(const QString &accession)
367 {
368  try
369  {
370  QMutexLocker locker(&m_mutex);
371  MapAccessionModifications::iterator it =
372  m_mapAccessionModifications.find(accession);
373  if(it == m_mapAccessionModifications.end())
374  {
375 
376  // not found
377  std::pair<MapAccessionModifications::iterator, bool> insert_res =
379  std::pair<QString, AaModificationP>(
380  accession, AaModification::createInstance(accession)));
381  it = insert_res.first;
382  }
383  else
384  {
385  // found
386  }
387  return it->second;
388  }
389  catch(ExceptionNotFound &e)
390  {
391  throw ExceptionNotFound(
392  QObject::tr("ERROR getting instance of : %1 NOT FOUND\n%2")
393  .arg(accession)
394  .arg(e.qwhat()));
395  }
396  catch(PappsoException &e)
397  {
398  throw PappsoException(QObject::tr("ERROR getting instance of %1\n%2")
399  .arg(accession)
400  .arg(e.qwhat()));
401  }
402  catch(std::exception &e)
403  {
404  throw PappsoException(QObject::tr("ERROR getting instance of %1\n%2")
405  .arg(accession)
406  .arg(e.what()));
407  }
408 }
409 
412 {
413 
414  QMutexLocker locker(&m_mutex);
415  MapAccessionModifications::iterator it =
417  if(it == m_mapAccessionModifications.end())
418  {
419  // not found
420  std::pair<MapAccessionModifications::iterator, bool> insert_res =
421  m_mapAccessionModifications.insert(std::pair<QString, AaModificationP>(
422  oboterm.m_accession, AaModification::createInstance(oboterm)));
423  it = insert_res.first;
424  }
425  else
426  {
427  // found
428  }
429  return it->second;
430 }
431 
432 
435  pappso_double mass,
436  const PeptideSp &peptide_sp,
437  unsigned int position)
438 {
440  if(MzRange(mass, precision).contains(getInstance("MOD:00719")->getMass()))
441  {
442  if(type == "M")
443  {
444  return getInstance("MOD:00719");
445  }
446  if(type == "K")
447  {
448  return getInstance("MOD:01047");
449  }
450  }
451  // accession== "MOD:00057"
452  if(MzRange(mass, precision).contains(getInstance("MOD:00408")->getMass()))
453  {
454  // id: MOD:00394
455  // name: acetylated residue
456  // potential N-terminus modifications
457  if(position == 0)
458  {
459  return getInstance("MOD:00408");
460  }
461  }
462  if(MzRange(mass, precision).contains(getInstance("MOD:01160")->getMass()))
463  {
464  //-17.02655
465  // loss of ammonia [MOD:01160] -17.026549
466  return getInstance("MOD:01160");
467  }
468 
469  if(MzRange(mass, precision).contains(getInstance("MOD:01060")->getMass()))
470  {
471  //// iodoacetamide [MOD:00397] 57.021464
472  if(type == "C")
473  {
474  return getInstance("MOD:01060");
475  }
476  else
477  {
478  return getInstance("MOD:00397");
479  }
480  }
481  if(MzRange(mass, precision).contains(getInstance("MOD:00704")->getMass()))
482  {
483  // loss of water
484  /*
485  if (position == 0) {
486  if (peptide_sp.get()->getSequence().startsWith("EG")) {
487  return getInstance("MOD:00365");
488  }
489  if (peptide_sp.get()->getSequence().startsWith("ES")) {
490  return getInstance("MOD:00953");
491  }
492  if (type == "E") {
493  return getInstance("MOD:00420");
494  }
495  }
496  */
497  // dehydrated residue [MOD:00704] -18.010565
498  return getInstance("MOD:00704");
499  }
500  if(MzRange(mass, precision).contains(getInstance("MOD:00696")->getMass()))
501  {
502  // phosphorylated residue [MOD:00696] 79.966330
503  return getInstance("MOD:00696");
504  }
505  bool isCter = false;
506  if(peptide_sp.get()->size() == (position + 1))
507  {
508  isCter = true;
509  }
510  if((position == 0) || isCter)
511  {
512  if(MzRange(mass, precision).contains(getInstance("MOD:00429")->getMass()))
513  {
514  // dimethyl
515  return getInstance("MOD:00429");
516  }
517  if(MzRange(mass, precision).contains(getInstance("MOD:00552")->getMass()))
518  {
519  // 4x(2)H labeled dimethyl residue
520  return getInstance("MOD:00552");
521  }
522  if(MzRange(mass, precision).contains(getInstance("MOD:00638")->getMass()))
523  {
524  // 2x(13)C,6x(2)H-dimethylated arginine
525  return getInstance("MOD:00638");
526  }
527  }
528  throw PappsoException(
529  QObject::tr("tandem modification not found : %1 %2 %3 %4")
530  .arg(type)
531  .arg(mass)
532  .arg(peptide_sp.get()->getSequence())
533  .arg(position));
534 }
535 
538 {
539  return m_mass;
540 }
541 
542 
543 int
545 {
546  // qDebug() << "AaModification::getNumberOfAtom(AtomIsotopeSurvey atom) NOT
547  // IMPLEMENTED";
548  return m_atomCount.at(atom);
549 }
550 
551 
552 int
554 {
555  try
556  {
557  return m_mapIsotope.at(isotope);
558  }
559  catch(std::exception &e)
560  {
561  throw PappsoException(
562  QObject::tr("ERROR in AaModification::getNumberOfIsotope %2")
563  .arg(e.what()));
564  }
565 }
566 
567 
568 bool
570 {
571  if(m_accession.startsWith("internal:"))
572  {
573  return true;
574  }
575  return false;
576 }
577 
579 AaModification::createInstanceMutation(const Aa &aa_from, const Aa &aa_to)
580 {
581  QString accession(
582  QString("MUTATION:%1=>%2").arg(aa_from.getLetter()).arg(aa_to.getLetter()));
583  double diffMono = aa_to.getMass() - aa_from.getMass();
584  // not found
585  AaModification *instance_mutation;
586  // qDebug() << " AaModification::createInstance begin";
587  instance_mutation = new AaModification(accession, diffMono);
588  // xref: DiffFormula: "C 0 H 0 N 0 O 1 S 0"
589 
590  for(std::int8_t atomInt = (std::int8_t)AtomIsotopeSurvey::C;
591  atomInt != (std::int8_t)AtomIsotopeSurvey::last;
592  atomInt++)
593  {
594  AtomIsotopeSurvey atom = static_cast<AtomIsotopeSurvey>(atomInt);
595  instance_mutation->m_atomCount[atom] =
596  aa_to.getNumberOfAtom(atom) - aa_from.getNumberOfAtom(atom);
597  }
598  instance_mutation->m_name = QString("mutation from %1 to %2")
599  .arg(aa_from.getLetter())
600  .arg(aa_to.getLetter());
601  return instance_mutation;
602 }
603 
604 
606 AaModification::getInstanceMutation(const QChar &mut_from, const QChar &mut_to)
607 {
608  QString accession(QString("MUTATION:%1=>%2").arg(mut_from).arg(mut_to));
609  try
610  {
611  QMutexLocker locker(&m_mutex);
612  MapAccessionModifications::iterator it =
613  m_mapAccessionModifications.find(accession);
614  if(it == m_mapAccessionModifications.end())
615  {
616  Aa aa_from(mut_from.toLatin1());
617  Aa aa_to(mut_to.toLatin1());
618  AaModificationP instance_mutation =
619  createInstanceMutation(aa_from, aa_to);
620 
621  std::pair<MapAccessionModifications::iterator, bool> insert_res =
623  std::pair<QString, AaModificationP>(accession,
624  instance_mutation));
625  it = insert_res.first;
626  }
627  else
628  {
629  // found
630  }
631  return it->second;
632  }
633  catch(ExceptionNotFound &e)
634  {
635  throw ExceptionNotFound(
636  QObject::tr("ERROR getting instance of : %1 NOT FOUND\n%2")
637  .arg(accession)
638  .arg(e.qwhat()));
639  }
640  catch(PappsoException &e)
641  {
642  throw PappsoException(QObject::tr("ERROR getting instance of %1\n%2")
643  .arg(accession)
644  .arg(e.qwhat()));
645  }
646  catch(std::exception &e)
647  {
648  throw PappsoException(QObject::tr("ERROR getting instance of %1\n%2")
649  .arg(accession)
650  .arg(e.what()));
651  }
652 } // namespace pappso
653 
654 } // namespace pappso
amino acid modification model
virtual const char & getLetter() const
Definition: aabase.cpp:432
const QString & getName() const
static AaModificationP getInstanceMutation(const QChar &mut_from, const QChar &mut_to)
get a fake modification coding a mutation from an amino acid to an other
static AaModificationP createInstance(const QString &saccession)
std::map< Isotope, int > m_mapIsotope
const QString & getAccession() const
static AaModificationP getInstanceXtandemMod(const QString &type, pappso_double mass, const PeptideSp &peptide_sp, unsigned int position)
AaModification(AaModification &&toCopy)
std::map< AtomIsotopeSurvey, int > m_atomCount
int getNumberOfAtom(AtomIsotopeSurvey atom) const override final
get the number of atom C, O, N, H in the molecule
pappso_double getMass() const
void setXrefOrigin(const QString &origin)
set list of amino acid on which this modification takes place
std::map< QString, AaModificationP > MapAccessionModifications
static AaModificationP getInstance(const QString &accession)
static AaModificationP getInstanceCustomizedMod(pappso_double modificationMass)
const QString m_accession
void setDiffFormula(const QString &diff_formula)
static AaModificationP createInstanceMutation(const Aa &aa_from, const Aa &aa_to)
void calculateMassFromChemicalComponents()
static MapAccessionModifications m_mapAccessionModifications
int getNumberOfIsotope(Isotope isotope) const override final
get the number of isotopes C13, H2, O17, O18, N15, S33, S34, S36 in the molecule
Definition: aa.h:45
int getNumberOfAtom(AtomIsotopeSurvey atom) const override final
get the number of atom C, O, N, H in the molecule
Definition: aa.cpp:166
pappso_double getMass() const override
Definition: aa.cpp:79
const OboPsiModTerm & getOne()
virtual const QString & qwhat() const
const char * what() const noexcept override
static PrecisionPtr getDaltonInstance(pappso_double value)
get a Dalton precision pointer
Definition: precision.cpp:129
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
const pappso_double DIFFS32S33(32.9714589101 - MASSSULFUR)
const pappso_double DIFFS32S34(33.9678670300 - MASSSULFUR)
const pappso_double DIFFO16O17(16.99913150 - MASSOXYGEN)
const pappso_double MASSCARBON(12)
const pappso_double MASSSULFUR(31.9720711741)
std::shared_ptr< const Peptide > PeptideSp
const pappso_double DIFFS32S36(35.9670812000 - MASSSULFUR)
const AaModification * AaModificationP
AtomIsotopeSurvey
Definition: types.h:76
double pappso_double
A type definition for doubles.
Definition: types.h:48
Isotope
Definition: types.h:91
const pappso_double MPROTIUM(1.007825032241)
const pappso_double MASSNITROGEN(14.0030740048)
const pappso_double MASSOXYGEN(15.99491461956)
const pappso_double DIFFO16O18(17.9991610 - MASSOXYGEN)
const pappso_double DIFFN14N15(15.0001088982 - MASSNITROGEN)
const pappso_double DIFFC12C13(1.0033548378)
const pappso_double DIFFH1H2(2.0141017778 - MPROTIUM)