/**
 * \file pappsomspp/processing/specpeptidoms/semiglobalalignment.h
 * \date 24/03/2025
 * \author Aurélien Berthier
 * \brief protein to spectrum alignment
 *
 * C++ implementation of the SpecPeptidOMS algorithm described in :
 * (1) Benoist, É.; Jean, G.; Rogniaux, H.; Fertin, G.; Tessier, D. SpecPeptidOMS Directly and
 * Rapidly Aligns Mass Spectra on Whole Proteomes and Identifies Peptides That Are Not Necessarily
 * Tryptic: Implications for Peptidomics. J. Proteome Res. 2025.
 * https://doi.org/10.1021/acs.jproteome.4c00870.
 */

/*
 * Copyright (c) 2025 Aurélien Berthier
 * <aurelien.berthier@ls2n.fr>
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#pragma once

#include "spomsspectrum.h"
#include "../../protein/protein.h"
#include "scorevalues.h"
#include "locationsaver.h"
#include "scenario.h"
#include "peptidemodel.h"
#include "spomsprotein.h"

namespace pappso
{
namespace specpeptidoms
{

struct KeyCell
{
  std::size_t n_row;
  int score;
  std::size_t beginning;
  int tree_id;
};

struct Alignment
{
  /** @brief reinitialize to default score_values
   */
  void reset();
  /** @brief convenient function to get peptide sequence from location
   */
  QString getPeptideString(const QString &protein_sequence) const;


  /** @brief convenient function to get the remaining non explained mass shift
   *
   * non explained mass delta between the peptide chemical formula and the observed experimental
   * spectrum precursor
   */
  double getNonAlignedMass() const;

  /** @brief get position of start on the protein sequence
   */
  std::size_t getPositionStart() const;

  std::vector<std::size_t> peaks; //  List of the spectrum's peaks used by the alignment
  PeptideModel m_peptideModel;    // Peptide model representing the alignment, with mass shifts and
                                  // sequence shifts
  int score          = 0;         // Final alignment score
  double begin_shift = 0.0,       // Shift between the spectrum's first peak (at 19.02 Da) and the
                                  // alignment's last peak (first in N->C order).
    end_shift = 0.0; // Missing mass between the alignment's total mass (including begin_shift) and
                     // the spectrum precursor mass.
  std::vector<double> shifts; // List of mass shifts present in the alignment
  std::size_t SPC = 0,        // SPC : Shared Peak Count
    beginning     = 0, // Localization of the alignment's first amino acid in the peptide sequence
    end           = 0; // Localization of the alignment's last amino acid in the peptide sequence
};

class SemiGlobalAlignment
{
  public:
  /**
   * Default constructor
   */
  SemiGlobalAlignment(const ScoreValues &score_values,
                      const pappso::PrecisionPtr precision_ptr,
                      const AaCode &aaCode);

  /**
   * Destructor
   */
  ~SemiGlobalAlignment();

  /**
   * @brief perform the first alignment search between a protein sequence and a spectrum. The member
   * location heap is filled with the candidates locations.
   * @param spectrum Spectrum to align
   * @param protein_ptr Protein pointer on the sequence to align.
   */
  void fastAlign(const SpOMSSpectrum &spectrum, const SpOMSProtein *protein_ptr);

  /**
   * @brief performs the second alignment search between a protein subsequence and a spectrum.
   * @param spectrum Spectrum to align
   * @param protein_ptr Protein pointer on the sequence to align.
   * @param beginning Index of the beginning of the subsequence to align.
   * @param length Length of the subsequence to align.
   */
  void preciseAlign(const SpOMSSpectrum &spectrum,
                    const SpOMSProtein *protein_ptr,
                    const std::size_t beginning,
                    const std::size_t length);

  /**
   * @brief performs the post-processing : generates corrected spectra and align them
   * @param spectrum Spectrum to align
   * @param protein_ptr Protein pointer on the sequence to align.
   * @param beginning Index of the beginning of the subsequence to align.
   * @param length Length of the subsequence to align.
   * @param shifts List of potential precursor mass errors to test.
   */
  void postProcessingAlign(const SpOMSSpectrum &spectrum,
                           const SpOMSProtein *protein_ptr,
                           std::size_t beginning,
                           std::size_t length,
                           const std::vector<double> &shifts);

  /**
   * @brief Returns a copy of m_location_saver.
   */
  LocationSaver getLocationSaver() const;

  /**
   * @brief Returns a copy of m_scenario.
   */
  Scenario getScenario() const;

  /**
   * @brief Returns a const ref to m_best_alignment.
   */
  const Alignment &getBestAlignment() const;


  /** @brief convenient function for degub purpose
   */
  const std::vector<KeyCell> &getInterestCells() const;


  /**
   * @brief Returns a list of the potential mass errors corresponding to the provided alignment in
   * the provided protein sequence.
   * @param aa_code the amino acid code of reference to get aminon acid masses
   * @param alignment Alignment for which to get the potential mass errors.
   * @param protein_seq Protein sequence corresponding to the provided alignment.
   */
  static std::vector<double> getPotentialMassErrors(const pappso::AaCode &aa_code,
                                                    const Alignment &alignment,
                                                    const QString &protein_seq);

  /** @brief check that the sequence has a minimum of amino acid checkSequenceDiversity
   * @param sequence protein sequence
   * @param window the size of substring to check
   * @param minimum_aa_diversity minimum number of different amino acid in this window
   */
  static bool checkSequenceDiversity(const QString &sequence,
                                     std::size_t window,
                                     std::size_t minimum_aa_diversity);


  private:
  /**
   * @brief Stores the best alignment from m_scenario in m_best_alignment
   * @param sequence reversed sequence of the current alignment.
   * @param spectrum Spectrum currently being aligned.
   * @param offset Size of the protein sequence minus beginning of the alignment. Used to compute
   * the position of the alignment in the protein sequence.
   */
  void saveBestAlignment(const SpOMSProtein &sequence,
                         const SpOMSSpectrum &spectrum,
                         std::size_t offset);

  /**
   * @brief Recursively performs the correction of the alignment.
   * @param protein_seq Protein reversed sequence to align.
   * @param protein_ptr Protein pointer on the sequence to align.
   * @param spectrum Spectrum to align.
   * @param peaks_to_remove Peaks to remove from the spectrum.
   * @param offset Size of the protein sequence minus beginning of the alignment. Used to compute
   * the position of the alignment in the protein sequence.
   */
  void correctAlign(const SpOMSProtein &protein_subseq,
                    const SpOMSProtein *protein_ptr,
                    const SpOMSSpectrum &spectrum,
                    std::vector<std::size_t> &peaks_to_remove,
                    std::size_t offset);

  /**
   * @brief updates the scores of the alignment matrix for a given amino acid as well as the
   * location heap/scenario.
   * @param sequence Reversed sequence of the protein being aligned
   * @param row_number number of the row to update (== index in sequence of the amino acid being
   * aligned)
   * @param aa_positions list of the AaPositions of the current amino acid
   * @param spectrum Spectrum being aligned
   * @param fast_align Whether to use the fast version of the algorithm (for 1st alignemnt step)
   * @param protein_ptr Protein pointer on the sequence to align.
   */
  void updateAlignmentMatrix(const pappso::specpeptidoms::SpOMSProtein &sequence,
                             const std::size_t row_number,
                             const std::vector<AaPosition> &aa_positions,
                             const SpOMSSpectrum &spectrum,
                             const bool fast_align,
                             const pappso::specpeptidoms::SpOMSProtein *protein_ptr);

  /**
   * @brief indicates if a perfect shift is possible between the provided positions
   * @param sequence Reversed sequence of the protein being aligned
   * @param spectrum Spectrum being aligned
   * @param origin_row beginning row of the aa gap to verify (== index of the first missing aa in
   * sequence)
   * @param current_row row being processed (== index of the current AaPosition in sequence)
   * @param l_peak left peak index of the mz gap to verify
   * @param r_peak right peak index of the mz gap to verify
   */
  bool perfectShiftPossible(const pappso::specpeptidoms::SpOMSProtein &sequence,
                            const SpOMSSpectrum &spectrum,
                            const std::size_t origin_row,
                            const std::size_t current_row,
                            const std::size_t l_peak,
                            const std::size_t r_peak) const;

  /**
   * @brief indicates if a perfect shift is possible from the spectrum beginning to the provided
   * peak.
   * @param sequence Reversed sequence of the protein being aligned
   * @param spectrum Spectrum being aligned
   * @param current_row row being processed (== index of the current AaPosition in sequence)
   * @param r_peak right peak index of the mz gap to verify
   */
  std::size_t perfectShiftPossibleFrom0(const pappso::specpeptidoms::SpOMSProtein &sequence,
                                        const SpOMSSpectrum &spectrum,
                                        const std::size_t current_row,
                                        const std::size_t r_peak) const;

  /**
   * @brief indicates if a perfect shift is possible between the provided positions
   * @param sequence Reversed sequence of the protein being aligned
   * @param spectrum Spectrum being aligned
   * @param end_row Index of the last aligned row.
   * @param end_peak Index of the last aligned peak.
   */
  std::size_t perfectShiftPossibleEnd(const pappso::specpeptidoms::SpOMSProtein &sequence,
                                      const SpOMSSpectrum &spectrum,
                                      std::size_t end_row,
                                      std::size_t end_peak) const;


  private:
  std::vector<KeyCell> m_interest_cells;
  std::vector<std::pair<std::size_t, KeyCell>> m_updated_cells;
  const ScoreValues &m_scorevalues;
  const int min_score = 15;
  pappso::PrecisionPtr m_precision_ptr;
  const AaCode &m_aaCode;
  LocationSaver m_location_saver;
  Scenario m_scenario;
  Alignment m_best_alignment;
  Alignment m_best_corrected_alignment;
  Alignment m_best_post_processed_alignment;
};
} // namespace specpeptidoms
} // namespace pappso
