/**
 * \file pappsomspp/amino_acid/chemicalformula.cpp
 * \date 06/04/2025
 * \author Olivier Langella
 * \brief structure to describe chemical formula
 */

/*******************************************************************************
 * Copyright (c) 2025 Olivier Langella
 *<Olivier.Langella@universite-paris-saclay.fr>.
 *
 * This file is part of the PAPPSOms++ library.
 *
 *     PAPPSOms++ is free software: you can redistribute it and/or modify
 *     it under the terms of the GNU General Public License as published by
 *     the Free Software Foundation, either version 3 of the License, or
 *     (at your option) any later version.
 *
 *     PAPPSOms++ is distributed in the hope that it will be useful,
 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *     GNU General Public License for more details.
 *
 *     You should have received a copy of the GNU General Public License
 *     along with PAPPSOms++.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/

#include <QRegularExpression>
#include "pappsomspp/core/amino_acid/chemicalformula.h"
#include "../obo/obopsimodterm.h"
#include "pappsomspp/core/exception/exceptionnotfound.h"


const QString
pappso::IsotopeCount::toString() const
{
  QString atom;
  switch(isotope)
    {
      case Enums::Isotope::C:
        atom = "C";
        break;
      case Enums::Isotope::C13:
        atom = "(13)C";
        break;
      case Enums::Isotope::H:
        atom = "H";
        break;
      case Enums::Isotope::H2:
        atom = "(2)H";
        break;
      case Enums::Isotope::N:
        atom = "N";
        break;
      case Enums::Isotope::N15:
        atom = "(15)N";
        break;
      case Enums::Isotope::O:
        atom = "O";
        break;
      case Enums::Isotope::O17:
        atom = "(17)O";
        break;
      case Enums::Isotope::O18:
        atom = "(18)O";
        break;
      case Enums::Isotope::P:
        atom = "P";
        break;
      case Enums::Isotope::S:
        atom = "S";
        break;
      case Enums::Isotope::S33:
        atom = "(33)S";
        break;
      case Enums::Isotope::S34:
        atom = "(34)S";
        break;
      case Enums::Isotope::S36:
        atom = "(36)S";
        break;
    }
  return QString("%1 %2").arg(atom).arg(count);
}


pappso::ChemicalFormula::ChemicalFormula()
{
}

pappso::ChemicalFormula::~ChemicalFormula()
{
}

pappso::ChemicalFormula::ChemicalFormula(const pappso::ChemicalFormula &other)
  : m_isotopeVector(other.m_isotopeVector)
{
}


pappso::ChemicalFormula::ChemicalFormula(const pappso::AtomNumberInterface &atom_interface)
{
  qDebug();
  std::int16_t count_iso = atom_interface.getNumberOfIsotope(Enums::Isotope::C13);
  std::int16_t count     = atom_interface.getNumberOfAtom(Enums::AtomIsotopeSurvey::C) - count_iso;
  qDebug() << count;
  if(count > 0)
    {
      m_isotopeVector.push_back({Enums::Isotope::C, count});
    }

  if(count_iso > 0)
    {
      m_isotopeVector.push_back({Enums::Isotope::C13, count_iso});
    }

  count_iso = atom_interface.getNumberOfIsotope(Enums::Isotope::H2);
  count     = atom_interface.getNumberOfAtom(Enums::AtomIsotopeSurvey::H) - count_iso;
  if(count > 0)
    {
      m_isotopeVector.push_back({Enums::Isotope::H, count});
    }

  if(count_iso > 0)
    {
      m_isotopeVector.push_back({Enums::Isotope::H2, count_iso});
    }

  qDebug();
  count_iso               = atom_interface.getNumberOfIsotope(Enums::Isotope::O17);
  std::int16_t count_iso2 = atom_interface.getNumberOfIsotope(Enums::Isotope::O18);
  count = atom_interface.getNumberOfAtom(Enums::AtomIsotopeSurvey::O) - count_iso - count_iso2;
  if(count > 0)
    {
      m_isotopeVector.push_back({Enums::Isotope::O, count});
    }

  if(count_iso > 0)
    {
      m_isotopeVector.push_back({Enums::Isotope::O17, count_iso});
    }

  if(count_iso2 > 0)
    {
      m_isotopeVector.push_back({Enums::Isotope::O18, count_iso2});
    }

  qDebug();

  count_iso = atom_interface.getNumberOfIsotope(Enums::Isotope::N15);
  count     = atom_interface.getNumberOfAtom(Enums::AtomIsotopeSurvey::N) - count_iso;
  if(count > 0)
    {
      m_isotopeVector.push_back({Enums::Isotope::N, count});
    }

  if(count_iso > 0)
    {
      m_isotopeVector.push_back({Enums::Isotope::N15, count_iso});
    }

  qDebug();

  count     = atom_interface.getNumberOfAtom(Enums::AtomIsotopeSurvey::S);
  count_iso = atom_interface.getNumberOfIsotope(Enums::Isotope::S33);
  if(count_iso > 0)
    {
      m_isotopeVector.push_back({Enums::Isotope::S33, count_iso});
    }
  count = count - count_iso;
  qDebug();

  count_iso = atom_interface.getNumberOfIsotope(Enums::Isotope::S34);
  if(count_iso > 0)
    {
      m_isotopeVector.push_back({Enums::Isotope::S34, count_iso});
    }
  count = count - count_iso;
  qDebug();

  count_iso = atom_interface.getNumberOfIsotope(Enums::Isotope::S36);
  if(count_iso > 0)
    {
      m_isotopeVector.push_back({Enums::Isotope::S36, count_iso});
    }
  count = count - count_iso;

  if(count > 0)
    {
      m_isotopeVector.push_back({Enums::Isotope::S, count});
    }
  qDebug();
  count = atom_interface.getNumberOfAtom(Enums::AtomIsotopeSurvey::P);
  if(count > 0)
    {
      m_isotopeVector.push_back({Enums::Isotope::P, count});
    }

  qDebug() << m_isotopeVector.size();
}

pappso::ChemicalFormula &
pappso::ChemicalFormula::operator=(const pappso::ChemicalFormula &other)
{
  m_isotopeVector = other.m_isotopeVector;
  return *this;
}

pappso::ChemicalFormula
pappso::ChemicalFormula::operator-() const
{
  ChemicalFormula new_formula;

  for(const IsotopeCount &isotope_count : m_isotopeVector)
    {
      std::int16_t count = isotope_count.count * -1;
      new_formula.m_isotopeVector.push_back({isotope_count.isotope, count});
    }

  return new_formula;
}

pappso::ChemicalFormula
pappso::ChemicalFormula::operator+(const pappso::ChemicalFormula &to_add) const
{
  ChemicalFormula new_formula(*this);

  new_formula.m_isotopeVector.insert(new_formula.m_isotopeVector.end(),
                                     to_add.m_isotopeVector.begin(),
                                     to_add.m_isotopeVector.end());

  new_formula.simplify();

  return new_formula;
}


int
pappso::ChemicalFormula::getNumberOfAtom(Enums::AtomIsotopeSurvey atom) const
{
  int count = 0;
  for(auto &isotope_count : m_isotopeVector)
    {
      switch(atom)
        {
          case Enums::AtomIsotopeSurvey::C:
            if((isotope_count.isotope == Enums::Isotope::C) ||
               (isotope_count.isotope == Enums::Isotope::C13))
              count += isotope_count.count;
            break;

          case Enums::AtomIsotopeSurvey::H:
            if((isotope_count.isotope == Enums::Isotope::H) ||
               (isotope_count.isotope == Enums::Isotope::H2))
              count += isotope_count.count;
            break;
          case Enums::AtomIsotopeSurvey::O:
            if((isotope_count.isotope == Enums::Isotope::O) ||
               (isotope_count.isotope == Enums::Isotope::O17) ||
               (isotope_count.isotope == Enums::Isotope::O18))
              count += isotope_count.count;
            break;
          case Enums::AtomIsotopeSurvey::N:
            if((isotope_count.isotope == Enums::Isotope::N) ||
               (isotope_count.isotope == Enums::Isotope::N15))
              count += isotope_count.count;
            break;
          case Enums::AtomIsotopeSurvey::S:
            if((isotope_count.isotope == Enums::Isotope::S) ||
               (isotope_count.isotope == Enums::Isotope::S33) ||
               (isotope_count.isotope == Enums::Isotope::S34) ||
               (isotope_count.isotope == Enums::Isotope::S36))
              count += isotope_count.count;
            break;
          default:
            break;
        }
    }
  return count;
}

int
pappso::ChemicalFormula::getNumberOfIsotope(Enums::Isotope isotope) const
{
  int count = 0;
  for(auto &isotope_count : m_isotopeVector)
    {
      if(isotope_count.isotope == isotope)
        count += isotope_count.count;
    }
  return count;
}

double
pappso::ChemicalFormula::getMass() const
{

  double mass = 0;
  for(auto &isotope_count : m_isotopeVector)
    {
      switch(isotope_count.isotope)
        {
          case Enums::Isotope::C13:
            mass += ((DIFFC12C13 + MASSCARBON) * (double)isotope_count.count);
            break;
          case Enums::Isotope::C:
            mass += (MASSCARBON * (double)isotope_count.count);
            break;
          case Enums::Isotope::H:
            mass += (MPROTIUM * (double)isotope_count.count);
            break;
          case Enums::Isotope::H2:
            mass += ((DIFFH1H2 + MPROTIUM) * (double)isotope_count.count);
            break;
          case Enums::Isotope::N:
            mass += (MASSNITROGEN * (double)isotope_count.count);
            break;
          case Enums::Isotope::N15:
            mass += ((MASSNITROGEN + DIFFN14N15) * (double)isotope_count.count);
            break;
          case Enums::Isotope::O:
            mass += (MASSOXYGEN * (double)isotope_count.count);
            break;
          case Enums::Isotope::O17:
            mass += ((DIFFO16O17 + MASSOXYGEN) * (double)isotope_count.count);
            break;
          case Enums::Isotope::O18:
            mass += ((DIFFO16O18 + MASSOXYGEN) * (double)isotope_count.count);
            break;
          case Enums::Isotope::S:
            mass += (MASSSULFUR * (double)isotope_count.count);
            break;
          case Enums::Isotope::S33:
            mass += ((DIFFS32S33 + MASSSULFUR) * (double)isotope_count.count);
            break;
          case Enums::Isotope::S34:
            mass += ((DIFFS32S34 + MASSSULFUR) * (double)isotope_count.count);
            break;
          case Enums::Isotope::S36:
            mass += ((DIFFS32S36 + MASSSULFUR) * (double)isotope_count.count);
            break;
          case Enums::Isotope::P:
            mass += (MASSPHOSPHORUS * (double)isotope_count.count);
            break;
        }
    }
  return mass;
}

void
pappso::ChemicalFormula::setOboPsiModTerm(const pappso::OboPsiModTerm &term)
{
  m_isotopeVector.clear();

  if(term.isUnimod())
    {
      qDebug();
      setUnimodDiffFormula(term.m_diffFormula);
    }
  else
    {
      qDebug() << "is_a " << term.m_isA.join(" ");
      if(term.isA("MOD:01441"))
        {
          qDebug() << "term.isA(MOD:01441)";
          if(term.m_origin.isEmpty())
            {
              throw pappso::ExceptionNotFound(
                QObject::tr("origin not found for term : [%1]").arg(term.getAccession()));
            }
          setPsimodDiffFormula(term.m_formula);
          qDebug();
          // new_mod->m_mass = AaBase::getAaMass(term.m_origin[0].toLatin1());
        }
      else
        {
          setPsimodDiffFormula(term.m_diffFormula);
        }
    }
  qDebug();
}

void
pappso::ChemicalFormula::setUnimodDiffFormula(const QString &diff_formula)
{
  // C(-6) 13C(6) N(-2) 15N(2)

  // atom alone
  QRegularExpression rx("(^|\\s)([C,H,O,N,S,P])($|\\s)");
  QRegularExpressionMatchIterator i = rx.globalMatch(diff_formula);

  while(i.hasNext())
    {
      QRegularExpressionMatch match = i.next();

      qDebug() << match.captured(2) << " " << match.captured(2);
      std::int8_t count = 1;

      qDebug() << count;
      if(match.captured(2) == "C")
        {
          m_isotopeVector.push_back({Enums::Isotope::C, count});
        }
      else if(match.captured(2) == "H")
        {
          m_isotopeVector.push_back({Enums::Isotope::H, count});
        }
      else if(match.captured(2) == "N")
        {
          m_isotopeVector.push_back({Enums::Isotope::N, count});
        }
      else if(match.captured(2) == "O")
        {
          m_isotopeVector.push_back({Enums::Isotope::O, count});
        }
      else if(match.captured(2) == "S")
        {
          m_isotopeVector.push_back({Enums::Isotope::S, count});
        }
      else if(match.captured(2) == "P")
        {
          m_isotopeVector.push_back({Enums::Isotope::P, count});
        }
    }

  // C(-6) 13C(6) N(-2) 15N(2)
  rx.setPattern("(^|\\s)([C,H,O,N,S,P])\\(([-]{0,1}\\d+)\\)");
  i = rx.globalMatch(diff_formula);

  while(i.hasNext())
    {
      QRegularExpressionMatch match = i.next();

      qDebug() << match.captured(2) << " " << match.captured(3);
      std::int8_t count = match.captured(3).toInt();

      qDebug() << count;
      if(match.captured(2) == "C")
        {
          m_isotopeVector.push_back({Enums::Isotope::C, count});
        }
      else if(match.captured(2) == "H")
        {
          m_isotopeVector.push_back({Enums::Isotope::H, count});
        }
      else if(match.captured(2) == "N")
        {
          m_isotopeVector.push_back({Enums::Isotope::N, count});
        }
      else if(match.captured(2) == "O")
        {
          m_isotopeVector.push_back({Enums::Isotope::O, count});
        }
      else if(match.captured(2) == "S")
        {
          m_isotopeVector.push_back({Enums::Isotope::S, count});
        }
      else if(match.captured(2) == "P")
        {
          m_isotopeVector.push_back({Enums::Isotope::P, count});
        }
    }

  // C(-6) 13C(6) N(-2) 15N(2)
  // look for isotopes :
  rx.setPattern("(^|\\s)(\\d+)([C,H,O,N,S])($|\\s)");

  i = rx.globalMatch(diff_formula);

  while(i.hasNext())
    {
      QRegularExpressionMatch match = i.next();

      qDebug() << match.captured(2) << " " << match.captured(3);

      addIsotopeNumberCount(match.captured(3), match.captured(2), 1);
    }
  // C(-6) 13C(6) N(-2) 15N(2)
  // look for isotopes :
  rx.setPattern("(^|\\s)(\\d+)([C,H,O,N,S])\\(([-]{0,1}\\d+)\\)");

  i = rx.globalMatch(diff_formula);

  while(i.hasNext())
    {
      QRegularExpressionMatch match = i.next();

      qDebug() << match.captured(2) << " " << match.captured(3) << " " << match.captured(4);

      std::int8_t number_of_isotopes = match.captured(4).toInt();

      addIsotopeNumberCount(match.captured(3), match.captured(2), number_of_isotopes);
    }
}

void
pappso::ChemicalFormula::addIsotopeNumberCount(const QString &atom_str,
                                               const QString &atom_isotope_number,
                                               std::int8_t count)
{

  if(atom_str == "C")
    {
      if(atom_isotope_number == "12")
        {
          m_isotopeVector.push_back({Enums::Isotope::C, count});
        }
      else if(atom_isotope_number == "13")
        {
          m_isotopeVector.push_back({Enums::Isotope::C13, count});
        }
    }
  else if(atom_str == "H")
    {
      if(atom_isotope_number == "1")
        {
          m_isotopeVector.push_back({Enums::Isotope::H, count});
        }
      else if(atom_isotope_number == "2")
        {
          m_isotopeVector.push_back({Enums::Isotope::H2, count});
        }
    }
  else if(atom_str == "N")
    {

      if(atom_isotope_number == "14")
        {
          m_isotopeVector.push_back({Enums::Isotope::N, count});
        }
      else if(atom_isotope_number == "15")
        {
          m_isotopeVector.push_back({Enums::Isotope::N15, count});
        }
    }
  else if(atom_str == "O")
    {
      if(atom_isotope_number == "16")
        {
          m_isotopeVector.push_back({Enums::Isotope::O, count});
        }
      else if(atom_isotope_number == "17")
        {
          m_isotopeVector.push_back({Enums::Isotope::O17, count});
        }
      else if(atom_isotope_number == "18")
        {
          m_isotopeVector.push_back({Enums::Isotope::O18, count});
        }
    }
  else if(atom_str == "S")
    {
      if(atom_isotope_number == "32")
        {
          m_isotopeVector.push_back({Enums::Isotope::S, count});
        }
      else if(atom_isotope_number == "33")
        {
          m_isotopeVector.push_back({Enums::Isotope::S33, count});
        }
      else if(atom_isotope_number == "34")
        {
          m_isotopeVector.push_back({Enums::Isotope::S34, count});
        }
      else if(atom_isotope_number == "36")
        {
          m_isotopeVector.push_back({Enums::Isotope::S36, count});
        }
    }
  else
    {
      // not known

      throw pappso::ExceptionNotFound(QObject::tr("atom string: [%1] with atom number %2 not found")
                                        .arg(atom_str)
                                        .arg(atom_isotope_number));
    }
}


void
pappso::ChemicalFormula::setPsimodDiffFormula(const QString &diff_formula)
{
  QRegularExpression rx("(^|\\s)([C,H,O,N,S,P])\\s([-]{0,1}\\d+)");
  QRegularExpressionMatchIterator i = rx.globalMatch(diff_formula);

  while(i.hasNext())
    {
      QRegularExpressionMatch match = i.next();

      qDebug() << match.captured(2) << " " << match.captured(2) << " " << match.captured(3);
      std::int8_t count = match.captured(3).toInt();

      qDebug() << count;
      if(match.captured(2) == "C")
        {
          m_isotopeVector.push_back({Enums::Isotope::C, count});
        }
      else if(match.captured(2) == "H")
        {
          m_isotopeVector.push_back({Enums::Isotope::H, count});
        }
      else if(match.captured(2) == "N")
        {
          m_isotopeVector.push_back({Enums::Isotope::N, count});
        }
      else if(match.captured(2) == "O")
        {
          m_isotopeVector.push_back({Enums::Isotope::O, count});
        }
      else if(match.captured(2) == "S")
        {
          m_isotopeVector.push_back({Enums::Isotope::S, count});
        }
      else if(match.captured(2) == "P")
        {
          m_isotopeVector.push_back({Enums::Isotope::P, count});
        }
    }

  // look for isotopes :
  rx.setPattern("\\((\\d+)\\)([C,H,O,N,S])\\s([-]{0,1}\\d+)");

  i = rx.globalMatch(diff_formula);

  while(i.hasNext())
    {
      QRegularExpressionMatch match = i.next();

      qDebug() << match.captured(1) << " " << match.captured(2) << " " << match.captured(3);

      std::int8_t number_of_isotopes = match.captured(3).toInt();


      addIsotopeNumberCount(match.captured(2), match.captured(1), number_of_isotopes);
    }
}

void
pappso::ChemicalFormula::addIsotopeCount(const pappso::IsotopeCount &isotope_count)
{
  bool added = false;
  for(IsotopeCount &iso_count : m_isotopeVector)
    {
      if(iso_count.isotope == isotope_count.isotope)
        {
          added = true;
          iso_count.count += isotope_count.count;
        }
    }

  if(!added)
    m_isotopeVector.push_back(isotope_count);
}


const QString
pappso::ChemicalFormula::toString() const
{
  QStringList str_formula;
  for(const IsotopeCount &iso_count : m_isotopeVector)
    {
      if(iso_count.count != 0)
        str_formula << iso_count.toString();
    }
  str_formula.sort();
  return str_formula.join(" ");
}

void
pappso::ChemicalFormula::setFullIsotope(Enums::Isotope isotope)
{
  IsotopeCount *isotope_ref_p = nullptr;
  std::int16_t cumul_count    = 0;
  for(IsotopeCount &iso_count : m_isotopeVector)
    {
      if(isotope == Enums::Isotope::C13)
        {
          if(iso_count.isotope == Enums::Isotope::C)
            {
              cumul_count += iso_count.count;
              iso_count.count = 0;
            }
          if(iso_count.isotope == Enums::Isotope::C13)
            {
              cumul_count += iso_count.count;
              isotope_ref_p = &iso_count;
            }
        }
      else if(isotope == Enums::Isotope::N15)
        {
          if(iso_count.isotope == Enums::Isotope::N)
            {
              cumul_count += iso_count.count;
              iso_count.count = 0;
            }
          if(iso_count.isotope == Enums::Isotope::N15)
            {
              cumul_count += iso_count.count;
              isotope_ref_p = &iso_count;
            }
        }
      else if(isotope == Enums::Isotope::H2)
        {
          if(iso_count.isotope == Enums::Isotope::H)
            {
              cumul_count += iso_count.count;
              iso_count.count = 0;
            }
          if(iso_count.isotope == Enums::Isotope::H2)
            {
              cumul_count += iso_count.count;
              isotope_ref_p = &iso_count;
            }
        }
    }
  if(isotope_ref_p == nullptr)
    {
      if(isotope == Enums::Isotope::C13)
        {
          m_isotopeVector.push_back({Enums::Isotope::C13, cumul_count});
        }
      else if(isotope == Enums::Isotope::N15)
        {
          m_isotopeVector.push_back({Enums::Isotope::N15, cumul_count});
        }
      else if(isotope == Enums::Isotope::H2)
        {
          m_isotopeVector.push_back({Enums::Isotope::H2, cumul_count});
        }
    }
  else
    {
      isotope_ref_p->count = cumul_count;
    }
}


void
pappso::ChemicalFormula::simplify()
{
  std::vector<IsotopeCount> new_vector;
  std::sort(m_isotopeVector.begin(), m_isotopeVector.end(), [](IsotopeCount &a, IsotopeCount &b) {
    return a.isotope < b.isotope;
  });

  auto it = m_isotopeVector.begin();
  while(it != m_isotopeVector.end())
    {
      IsotopeCount current_element = *it;
      it++;
      if(it != m_isotopeVector.end())
        {
          if(it->isotope != current_element.isotope)
            {
              new_vector.push_back(current_element);
            }
          else
            {
              while((it != m_isotopeVector.end()) && (it->isotope == current_element.isotope))
                {
                  current_element.count += it->count;
                  it++;
                }
              if(current_element.count != 0)
                {
                  new_vector.push_back(current_element);
                }
            }
        }
    }
  m_isotopeVector = new_vector;
}
