// $Id: SequenceInfo.h 1369 2024-11-29 14:20:00Z ge $
/// \file SequenceInfo.h
/// \brief This class manages experimental sequence information
///
/// $Revision: 1369 $
/// \author Gerald Weber <gweberbh@gmail.com>
#ifndef GBC_SEQUENCEINFO_H
#define GBC_SEQUENCEINFO_H "$Id: SequenceInfo.h 1369 2024-11-29 14:20:00Z ge $"

#include "NucleotideSequence.h"
#include "NeighbourSequence.h"
#include "Duplex.h"
#include "StrandPairSequence.h"
#include "ReferenceSet.h"
#include "ErrorCodes.h"
#include "ParameterMap.h"
#include "RegexAux.h"

using namespace gbc;
namespace gbc
{
template<class _Tp=double>
class Data
  {
  typedef _Tp value_type;
  public:
    std::string name;
    value_type measured;
    value_type adjusted;
    value_type predicted;

  Data(void): name(), measured(), adjusted(), predicted() {}

  Data(std::string nm): name(nm), measured(), adjusted(), predicted() {}

  inline void print_name(std::ostream &out) const
    {
    out << name << ".measured " 
        << name << ".adjusted "
        << name << ".predicted "; 
    }

  inline friend std::ostream& operator<<(std::ostream &out, Data& dt)
    {
    out << dt.measured << " " << dt.adjusted << " " << dt.predicted;
    return out;
    }
  };
  
template<class _SequenceInfo> class SequenceDataset; //forward declaration

template<bool _Periodic=false, class _Tp=double, class _sequence_type=std::pair<std::string,std::string> >
class SequenceInfo
  {
  public:
    typedef SequenceInfo<_Periodic,_Tp,_sequence_type> sequenceinfo_type;
    typedef _Tp                 value_type;
    typedef Data<value_type>    data_type;
    typedef _sequence_type      sequence_type;
    typedef Duplex<>            duplex_type;
    typedef BasePair<>          base_pair_type;
    typedef BasePairNeighbours<> nneighbour_type;
    typedef std::deque<duplex_type> duplex_deque_type;
    typedef NeighbourSequence<> neighboursequence_type;
    typedef StrandPairSequence<> strand_pair_sequence_type;
    typedef std::map<std::string,std::string>    equivalence_map_type;
    typedef std::map<std::string,std::list<std::string> >    matched_equivalence_map_type;
    typedef std::map<std::string,value_type>     salt_map_type;
    typedef ReferenceSet<Duplex<> >                    duplex_ref_type;
    typedef ReferenceSet<NeighbourSequence<> >         neighbours_ref_type;
    typedef ReferenceSet<StrandPairSequence<> >        strandpair_ref_type;
    typedef std::map<duplex_type,int>             trimers_map_type;
    typedef std::valarray<value_type>                  vector_type;
    typedef std::set<std::string>                 string_set_type;
    typedef ParameterMap<value_type>              parameter_map_type;

    enum salt_correction_scheme_type {nocorrection,schildkraut65,santalucia98,owczarzy04eq19,owczarzy04eq20,owczarzy04eq21,owczarzy04eq22,chen13eq19,chen13eq20,chen13eq21,chen13eq22,nakano99,tm_ln,rec_tm_ln};
    typedef std::map<salt_correction_scheme_type,value_type>  salt_correction_map_type;
    
    sequence_type sequence;
    std::string   identification;     ///< string holding some type of optional identification
    std::string   from_file;          ///< file where this sequence was read
    constexpr static const value_type Concentration_factor=1e-6; ///< All species concentrations are in milimol
    constexpr static const value_type R=1.987;   ///< Gas constant, cal/(K mol)
    value_type Ref_concentration;     ///< Reference species concentration
    value_type alpha;                 ///< Determines if sequence is self-complementary
    value_type species_concentration; ///< Oligomer species concentration in \mu M
    salt_map_type salt_concentration; ///< Salt concentration in mM
    value_type Target_salt_ct;        ///< in mM
    data_type  temperature;           ///< Sequence temperature to species concentration
    data_type  enthalpy;              ///< Sequence Enthalpy as given in papers
    data_type  entropy;               ///< Sequence Entropy as given in papers
    data_type  gibbs_free_energy;     ///< DeltaG
    data_type  absorption_width;      ///< a1 in K
    int length;                       ///< Sequence length
    int prediction_method;            ///< Flag for selecting prediction methods
    int group_key;                    ///< A numeric key for grouping used together with prediction_group
    value_type thermal_equivalence;   ///< Thermal equivalence
    value_type thermal_index;         ///< Thermal index sqrt(thermal_equivalence)
    value_type partition_function;    ///< Zy
    value_type helmholtz_energy;      ///< Fy
    vector_type average_y;            ///< vector containing <y_i>
    bool Periodic;                    ///< Consider sequence as periodic, this will generate an additional NN pair connecting the first and the last bp
    std::string PrintFlags;           ///< Flags that control what is going to be printed
    
    /// Sequence information
    duplex_type            Exact_duplex;        ///< Contains the sequence in Duplex format, it is exact because we are not applying symmetry reductions
    duplex_type            BP_reduced_duplex;      ///< Contains the base-pair symmetry reduced sequence in Duplex format
    duplex_type            Symm_reduced_duplex;    ///< Contains the full-sequence symmetry reduced sequence in Duplex format
    neighboursequence_type Exact_neighbours;    ///< Contains the non-reduced sequence in Nearest neighbour format
    neighboursequence_type Reduced_neighbours;  ///< Contains the NN symmetry reduced sequence in Nearest neighbour format
    strand_pair_sequence_type Exact_strandpairs; ///< Contains the non-symmetry reduced sequence for strand pairs
    strand_pair_sequence_type Reduced_strandpairs;///< Contains the symmetry reduced sequence for strand pairs
    neighboursequence_type NPExact_neighbours;    ///< non-periodic
    neighboursequence_type NPReduced_neighbours;  ///< non-periodic
    strand_pair_sequence_type NPExact_strandpairs; ///< non-periodic
    strand_pair_sequence_type NPReduced_strandpairs;///< non-periodic
    equivalence_map_type*  pEquivalenceMap;     ///< Pointer to Dataset equivalence map
    equivalence_map_type*  pReMap;     ///< Pointer to Dataset equivalence map
    matched_equivalence_map_type*  pMatchedEquivalenceMap; ///< Holds matched equivalences
    duplex_deque_type      Trimers_list;
    trimers_map_type       Trimers_map;
    duplex_ref_type        Basepair_set;         ///< The set of different base pairs
    neighbours_ref_type    Reduced_neighbours_set;       ///< The set of different base pair neighbours
    int                    Reduced_neighbours_set_size;
    neighbours_ref_type    Exact_neighbours_set; ///< The set of different base pair neighbours
    strandpair_ref_type    StrandPair_set;       ///< The set of different strand pair neighbours
    neighbours_ref_type    NPNeighbours_set;     ///< The set of different base pair neighbours, non-periodic
    strandpair_ref_type    NPStrandPair_set;     ///< The set of different strand pair neighbours, non-periodic
    /// Sequence analysis
    bool                Has_CG;                 ///< True if sequence has at least one CG
    bool                Is_AT_only;             ///< True if sequence is AT only (may also be AU) 
    bool                Self_complementary;     ///< True if sequence is self-complementary
    base_pair_type      Terminal5_bp;           ///< Stores the 5' base pair
    base_pair_type      Terminal3_bp;           ///< Stores the 3' base pair
    base_pair_type      Terminal5_bp_reduced;   ///< Stores the symmetry reduced 5' base pair
    base_pair_type      Terminal3_bp_reduced;   ///< Stores the symmetry reduced 3' base pair
    nneighbour_type     Terminal5_NN;           ///< Stores the 5' NN
    nneighbour_type     Terminal3_NN;           ///< Stores the 3' NN
    nneighbour_type     Terminal5_NN_reduced;   ///< Stores the 5' NN
    nneighbour_type     Terminal3_NN_reduced;   ///< Stores the 3' NN
    nneighbour_type     Periodicity_NN;         ///< Stores the NN which results from periodic boundary conditions (Periodic=true)
    nneighbour_type     ExactPeriodicity_NN;    ///< Stores the exact NN which results from periodic boundary conditions (Periodic=true)
    bool                NN_periodicity_only;    ///< If true Periodicity_NN does not exist in sequence, only due to periodic boundary conditions
    int                 Terminal_5TA3;          ///< Number of 5'TA3' terminals
    int                 Terminal_AU;             ///< Number of AU or UA terminals
    int                 Terminal_AT;             ///< Number of AT or TA terminals
    int                 Terminal_CG;             ///< Number of CG or GC terminals
    int                 Terminal_dTrA;           ///< Number of dTrA or rAdT terminals
    int                 Terminal_dArU;           ///< Number of dArU or rUdA terminals
    int                 Terminal_dCrG;           ///< Number of dTrA or rAdT terminals
    int                 Terminal_dGrC;           ///< Number of dArT or rTdA terminals
    int                 BP_number;               ///< Number of base pairs in sequence, may include pseudo-base pairs
    int                 CG_content;              ///< Number of CG base pairs, may include hybrid dCrG or dGrC
    value_type          fCG;                     ///< Fractional CG content, may include hybrid dCrG or dGrC
    string_set_type     CG_equivalent_set;       ///< String list of BP that should be considered equivalent do CG, this list should be symmetry reduced
    string_set_type     AT_equivalent_set;       ///< String list of BP that should be considered equivalent do CG, this list should be symmetry reduced
    salt_correction_map_type    Corrected_Tm_map;
    salt_correction_scheme_type Salt_correction_scheme;
    bool                Salt_corrected;          ///< true if salt correction was applied
    bool                Prediction_with_salt_correction; ///< if true applies salt correction on predicted Tm, default is to apply on input Tm
    int*                pReduced_nn_number;     ///< A pointer to the contents of Reduced_neighbours_set
    value_type**        ppEnthalpy;             ///< Pointer-pointer
    value_type**        ppEntropy;              ///< Pointer-pointer
    parameter_map_type* pparameter_map;         ///< Pointer to a parameter map
    
    SequenceInfo(void): sequence(), identification(), alpha(), species_concentration(), 
                        Target_salt_ct(),
                        temperature("temperature"), enthalpy("enthalpy"), entropy("entropy"), gibbs_free_energy("gibbs_free_energy"), absorption_width("absorption_width"),
                        prediction_method(), group_key(), thermal_equivalence(), thermal_index(), partition_function(), helmholtz_energy(),
                        Periodic(_Periodic),
                        PrintFlags(),
                        pEquivalenceMap(), pReMap(), pMatchedEquivalenceMap(),
                        Has_CG(false), Is_AT_only(false),
                        Self_complementary(false), Terminal5_bp(), Terminal3_bp(),
                        Terminal5_bp_reduced(), Terminal3_bp_reduced(),
                        Terminal5_NN(), Terminal3_NN(), Terminal5_NN_reduced(), Terminal3_NN_reduced(),
                        Periodicity_NN(), ExactPeriodicity_NN(), NN_periodicity_only(false),
                        Terminal_5TA3(),
                        Terminal_dTrA(), Terminal_dArU(), Terminal_dCrG(), Terminal_dGrC(),
                        BP_number(), CG_content(), Reduced_neighbours_set_size(), fCG(),
                        Exact_neighbours_set(false),//never simplify symmetry
                        Salt_correction_scheme(nocorrection), Salt_corrected(false), Prediction_with_salt_correction(false),
                        pReduced_nn_number(), ppEnthalpy(), ppEntropy(), CG_equivalent_set(), AT_equivalent_set(), pparameter_map() 
      {
      CG_equivalent_set.insert("CG");
      CG_equivalent_set.insert("dCrG"); //DNA-RNA
      CG_equivalent_set.insert("dGrC"); //DNA-RNA
      AT_equivalent_set.insert("AT"); 
      AT_equivalent_set.insert("AU"); 
      }
                        

    //Everything that needs to be cleared if we insert a different sequence                    
    inline void clear_pointers_for_different_sequence(void)
      {
      if (pReduced_nn_number) delete[] pReduced_nn_number;
      if (ppEnthalpy) delete[] ppEnthalpy;
      if (ppEntropy)  delete[] ppEntropy;
      pReduced_nn_number=NULL;
      ppEnthalpy=NULL;
      ppEntropy=NULL;
      }
      
      
    inline void print_head(std::ostream& out)
      {
      out << "Main/Complementary " 
          << "alpha "
          << "salt_concentration "
          << "species_concentration ";
      temperature.print_name(out);
      enthalpy.print_name(out);
      entropy.print_name(out);
      if (reg_match(PrintFlags,"gibbs_free_energy")) gibbs_free_energy.print_name(out);
      if (reg_match(PrintFlags,"absorption_width")) absorption_width.print_name(out);
      out << "prediction_method " 
          << "thermal_equivalence "
          << "thermal_index "
          << "partition_function "
          << "helmholtz_energy "
          << std::endl;
      }

    void print(std::ostream& out, bool analysis=false)
      {
      if (analysis) out << "Dataset entry: ";
      out << sequence.first << "/" << sequence.second 
          << " " << alpha
          << " " << salt_concentration["Na+"]
          << " " << species_concentration
          << " " << temperature  
          << " " << enthalpy 
          << " " << entropy;
      CERR_DEBUG(DSQI_PRTFLG) << "PrintFlags=[" << PrintFlags << "]" << std::endl;
      if (reg_match(PrintFlags,"gibbs_free_energy")) 
        {
        std::string tstr = reg_match_string(PrintFlags,"gibbs_free_energy@(.+)");
        if (not tstr.empty()) calculate_gibbs_free_energy(atof(tstr.c_str()));
        else calculate_gibbs_free_energy();
        out << " " << gibbs_free_energy; 
        }
      if (reg_match(PrintFlags,"absorption_width")) 
        {
        calculate_absorption_width();
        out << " " << absorption_width; 
        }
      out << " " << prediction_method 
          << " " << thermal_equivalence
          << " " << thermal_index
          << " " << partition_function
          << " " << helmholtz_energy;
      if (group_key) out << " &" << group_key;
      if (not identification.empty()) out << " #" << identification;
      out << std::endl;
      if (analysis)
        {
        out << "* General characteristics " << std::endl;
        if (not from_file.empty()) out << "From_file: " << from_file << std::endl;
        out << "Salt_concentration_[Na+]: " << salt_concentration["Na+"]  << std::endl;
        out << "Ct_concentration: " << species_concentration << std::endl;
        if (group_key) out << "Group_key: " << group_key << std::endl;
        if (not identification.empty()) out << "Identification: " << identification << std::endl;
        out << "Length: " << BP_number << " "
            << "Has_CG: " << Has_CG << " "
            << "Is_AT_only: " << Is_AT_only << " "
            << "Self_complementary: " << Self_complementary << " "
            << "CG_content: " << CG_content << " "
            << "fCG: " << fCG << " "
            << " " << std::endl;
        out << "* Terminal characteristics " << std::endl;
        out << "Terminal NN at 5: " << Terminal5_NN << std::endl;
        out << "Terminal NN at 3: " << Terminal3_NN << std::endl;
        out << "Terminal NN (symmetry reduced) at 5: " << Terminal5_NN_reduced << std::endl;
        out << "Terminal NN (symmetry reduced) at 3: " << Terminal3_NN_reduced << std::endl;
        out << "Terminal base pairs at 5: " << Terminal5_bp << std::endl;
        out << "Terminal base pairs at 3: " << Terminal3_bp << std::endl;
        out << "Terminal base pairs (BP symmetry reduced) at 5: " << Terminal5_bp_reduced << std::endl;
        out << "Terminal base pairs (BP symmetry reduced) at 3: " << Terminal3_bp_reduced << std::endl;
        out << "Terminal_5TA3: " << Terminal_5TA3 << " "
            << "Terminal_AU: " << Terminal_AU << " "
            << "Terminal_AT: " << Terminal_AT << " "
            << "Terminal_CG: " << Terminal_CG << std::endl;
        out << "Terminal_dArU: " << Terminal_dArU << " "
            << "Terminal_dTrA: " << Terminal_dTrA << " "
            << "Terminal_dCrG: " << Terminal_dCrG << " "
            << "Terminal_dGrC: " << Terminal_dGrC << std::endl;
            
        if (Periodic)
          {
          out << "Periodicity_NN: " << Periodicity_NN << " only: " << NN_periodicity_only << std::endl;
          out << "ExactPeriodicity_NN: " << ExactPeriodicity_NN << " only: " << NN_periodicity_only << std::endl;
          }
        
        out << "Original_sequence  : "  << Exact_duplex.formatted_string()  << std::endl;
        out << "Symmetry_reduced   : "  << Symm_reduced_duplex.formatted_string()   << std::endl;
        out << "* Sequence decomposition: base pair list" << std::endl;
        out << "Exact_duplex       : "  << Exact_duplex  << " (" << Exact_duplex.size() << ")" << std::endl;
        out << "BP_reduced_duplex  : "  << BP_reduced_duplex  << " (" << BP_reduced_duplex.size() << ")"   << std::endl;
        out << "* Sequence decomposition: nearest-neighbour list" << std::endl;
        out << "Exact_neighbours   : "   << Exact_neighbours  << " (" << Exact_neighbours.size() << ")"   <<  std::endl;
        out << "Reduced_neighbours : " << Reduced_neighbours  << " (" << Reduced_neighbours.size() << ")"   <<  std::endl;
        out << "* Sequence decomposition: intra-strand neighbours list" << std::endl;
        out << "Exact_strandpairs  : " << Exact_strandpairs   << " (" << Exact_strandpairs.size() << ")"   <<  std::endl;
        out << "Reduced_strandpairs: " << Reduced_strandpairs << " (" << Reduced_strandpairs.size() << ")"   <<  std::endl;
        out << "* Sequence elements (symmetry reduced, strand-pairs reduced to 5'->3' direction)" << std::endl;
        out << "Basepair_set       : (" << Basepair_set.size()   << ") " << Basepair_set;
        out << "Reduced_neighbours_set     : (" << Reduced_neighbours_set.size() << ") " << Reduced_neighbours_set;

        if (Periodic)
        out << "NPNeighbours_set   : (" << NPNeighbours_set.size() << ") " << NPNeighbours_set;

        out << "Exact_neighbours_set     : (" << Exact_neighbours_set.size() << ") " << Exact_neighbours_set;
        out << "StrandPair_set     : (" << StrandPair_set.size() << ") " << StrandPair_set ;
        typename duplex_deque_type::iterator tri;
        out << "Trimers_list       : (" << Trimers_list.size() << ") ";
        for (tri=Trimers_list.begin(); tri != Trimers_list.end(); tri++)
          {
          out << (std::string)(*tri) << " ";
          }
        typename trimers_map_type::iterator trim;
        out << std::endl;
        out << "Trimers_set        : (" << Trimers_map.size() << ") ";
        for (trim=Trimers_map.begin(); trim != Trimers_map.end(); trim++)
          {
          out << (std::string)(trim->first) << "=" << trim->second << " ";
          }
        out << std::endl << std::endl;
              
        }
      }
      
    //Prints in the same format as a dataset
    void print_salt_adjusted_tm(std::ostream& out)
      {
      out << sequence.first << " " << sequence.second << " "
          << temperature.adjusted << " "
          << Target_salt_ct << " "
          << species_concentration << std::endl;
      }

      
    inline void apply_equivalence_map(void)
      {
      //Generates a list of trimers which includes both ends, 
      //for example CGGCUG/GUCGGC will result in UG_/GC_ CGG/GUC GCU/CGG GCU/CGG CGG/GUC UG_/GC_
      //note: this requires underscore (_) to be included in NucleotideDictionary
      if (pEquivalenceMap != NULL)
        {
        equivalence_map_type::iterator eq=pEquivalenceMap->begin();
          
        int duplex_length=static_cast<int>(Exact_duplex.size());
        std::deque<typename duplex_type::base_pair_type::version_type> version_list(Exact_duplex.size());
        for(int di=-1; di < (duplex_length-1); di++)
          {
          std::string termstr("__");
          if (Exact_duplex.is_hybrid()) termstr=Exact_duplex[0].first.sugar_string() + "_" + Exact_duplex[0].second.sugar_string() + "_";
          BasePair<>  terminal(termstr);
          duplex_type tmp;
          if (di < 0) tmp.push_back(terminal);
          else        tmp.push_back(Exact_duplex[di]);
          tmp.push_back(Exact_duplex[di+1]);
          if (di >= (duplex_length-2)) tmp.push_back(terminal);
          else        tmp.push_back(Exact_duplex[di+2]);
          tmp.reduce_to_smallest_duplex_symmetry();
          
          equivalence_map_type::iterator eq=pEquivalenceMap->find(tmp);
          if (eq != pEquivalenceMap->end())
            {
            BasePair<> rep(eq->second);
            version_list[di+1]=rep.Version;
            }
          else
            {
            bool matched=false;
            std::string first_match;
            for(auto re : *pReMap)
              {
              if (reg_match(tmp,re.first)) 
                {
                if (not matched)
                  {
                  (*pMatchedEquivalenceMap)[re.first].push_back(tmp);
                  (*pMatchedEquivalenceMap)[re.first].sort(); //sort is necessary as unique() only removes consecutive duplicates
                  (*pMatchedEquivalenceMap)[re.first].unique();
                  BasePair<> rep(re.second);
                  version_list[di+1]=rep.Version;
                  matched=true;
                  first_match=re.first;
                  
                  //check if matched trimer has the right central base-pair
                  rep.reduce_to_smallest_symmetry();
                  duplex_type trimer(tmp,base_pair_type::simplify_symmetry);
                  if (rep.formatted_string_no_version() != trimer[1].formatted_string_no_version())
                    {
                    CERR_ERROR(ERRCBRMT) << "Regex /" << re.first << "/ matched the trimer \"" << (std::string)tmp << "\" which has a central base pair=" << trimer[1].formatted_string_no_version() << ", however this is different from the central base of rule " << re.second << ", check your regular expression for mistakes" << std::endl;
                    CERR_TERM
                    }
                  }
                else
                  {
                  CERR_WARN(WIATAM) << "In additon to /" << first_match << " regex /" << re.first << "/ also matches " << (std::string)tmp << std::endl;
                  }
                }
              }
            }
           
          Trimers_list.push_back(tmp);
          Trimers_map[tmp]++;
          }
        for(int di=0; di < duplex_length; di++)
          {
          if (version_list[di].length() > 0)
            {
            Exact_duplex[di].Version=version_list[di];
            BP_reduced_duplex[di].Version=version_list[di];
            Symm_reduced_duplex[di].Version=version_list[di];
            }
          }
        }
      }
      
    // This is a crucial function, here we take the two sequence strings, one for main strand
    // one for complementary strand, and work out all equivalences, base pairs, nearest-neighbours,
    // determine if the are AT or CG terminals etc
    inline void insert_sequence(const std::string& seq, const std::string& comp=std::string())
      {
      clear_pointers_for_different_sequence();
      
      Exact_duplex.clear();
      BP_reduced_duplex.clear();
      Symm_reduced_duplex.clear();
      CERR_DEBUG(DSQI_INSSEQ) << "seq=" << seq << " comp=" << comp << std::endl;
     if(comp != std::string())
        {
        Exact_duplex=Duplex<>(seq+"/"+comp,BasePair<>::do_not_simplify_symmetry);
        BP_reduced_duplex=Duplex<>(seq+"/"+comp,BasePair<>::simplify_symmetry);
        Symm_reduced_duplex=Duplex<>(seq+"/"+comp,BasePair<>::do_not_simplify_symmetry);
        Symm_reduced_duplex.reduce_to_smallest_duplex_symmetry();
        }
      else 
        {
        Exact_duplex=Duplex<>(seq,BasePair<>::do_not_simplify_symmetry);
        BP_reduced_duplex=Duplex<>(seq,BasePair<>::simplify_symmetry);
        Symm_reduced_duplex=Duplex<>(seq,BasePair<>::do_not_simplify_symmetry);
        Symm_reduced_duplex.reduce_to_smallest_duplex_symmetry();
        }
        
      length=BP_number=Exact_duplex.size();
      
      apply_equivalence_map(); //this is the map where the equivalence rules read from .par files are applied

      Reduced_neighbours.clear();
      Reduced_neighbours.Periodic=Periodic; // This avoids that the first and last base-pair form a nearest-neighbour pair

      Reduced_neighbours=Exact_duplex;
      Reduced_neighbours.reduce_to_smallest_symmetry();
     
      Exact_neighbours.clear();
      Exact_neighbours.Periodic=Periodic; // This avoids that the first and last base-pair form a nearest-neighbour pair
      Exact_neighbours=Exact_duplex; //here we do not call reduce_to_smallest_symmetry()
      
      Basepair_set.clear();
      Basepair_set.Symmetry_action=BasePair<>::simplify_symmetry;
      Basepair_set.analyse(BP_reduced_duplex);

      Reduced_neighbours_set.clear();
      Reduced_neighbours_set.analyse(Reduced_neighbours);
      Reduced_neighbours_set_size=Reduced_neighbours_set.size();
      
      Exact_neighbours_set.clear();
      Exact_neighbours_set.analyse(Exact_neighbours);

      Exact_strandpairs.clear();
      Exact_strandpairs.Symmetry_action=StrandPair<>::do_not_simplify_symmetry;
      Exact_strandpairs = Exact_neighbours;
      
      Reduced_strandpairs.clear();
      Reduced_strandpairs.Symmetry_action=StrandPair<>::simplify_symmetry;
      Reduced_strandpairs = Exact_neighbours;
      
      StrandPair_set.clear();
      StrandPair_set.analyse(Reduced_strandpairs);
      
      if (Periodic)//when periodic we also generate the non-periodic info
        {
        NPReduced_neighbours.clear();
        NPReduced_neighbours.Periodic=false; // This avoids that the first and last base-pair form a nearest-neighbour pair
        NPReduced_neighbours=Exact_duplex;
        NPReduced_neighbours.reduce_to_smallest_symmetry();
        NPExact_neighbours.clear();
        NPExact_neighbours.Periodic=false;
        NPExact_neighbours=Exact_duplex; //here we do not call reduce_to_smallest_symmetry()
        NPNeighbours_set.clear();
        NPNeighbours_set.analyse(NPReduced_neighbours);
        NPExact_strandpairs.clear();
        NPExact_strandpairs.Symmetry_action=StrandPair<>::do_not_simplify_symmetry;
        NPExact_strandpairs = NPExact_neighbours;
        NPReduced_strandpairs.clear();
        NPReduced_strandpairs.Symmetry_action=StrandPair<>::simplify_symmetry;
        NPReduced_strandpairs = NPExact_neighbours;
        NPStrandPair_set.clear();
        NPStrandPair_set.analyse(NPReduced_strandpairs);
        Periodicity_NN=Reduced_neighbours.get_periodicity_neighbour();
        ExactPeriodicity_NN=Exact_neighbours.get_periodicity_neighbour();
        //This test checks if Periodicity_NN only exists due to periodic boundary conditions
        NN_periodicity_only = NPNeighbours_set.find(Periodicity_NN) == NPNeighbours_set.end();
        }
        
      Self_complementary=Exact_duplex.self_complementary();
            
      Terminal5_bp = *Exact_duplex.begin();
      Terminal3_bp = *Exact_duplex.rbegin();
      Terminal5_bp_reduced = *BP_reduced_duplex.begin();
      Terminal3_bp_reduced = *BP_reduced_duplex.rbegin();
      
      Terminal5_NN = *Exact_neighbours.begin();
      Terminal3_NN = *Exact_neighbours.rbegin();
      Terminal5_NN_reduced = *Reduced_neighbours.begin();
      Terminal3_NN_reduced = *Reduced_neighbours.rbegin();

            
      BasePair<> AT("AT",BasePair<>::simplify_symmetry),
                 AU("AU",BasePair<>::simplify_symmetry),
                 TA("TA",BasePair<>::simplify_symmetry),//internally represented as AT
                 UA("UA",BasePair<>::simplify_symmetry),//internally represented as AU
                 CG("CG",BasePair<>::simplify_symmetry),
                 GC("GC",BasePair<>::simplify_symmetry),//internally represented as CG
                 dTrA("dTrA",BasePair<>::simplify_symmetry),
                 dArU("dArU",BasePair<>::simplify_symmetry),
                 dCrG("dCrG",BasePair<>::simplify_symmetry),
                 dGrC("dGrC",BasePair<>::simplify_symmetry),
                 rAdT("rAdT",BasePair<>::simplify_symmetry),
                 rUdA("rUdA",BasePair<>::simplify_symmetry),
                 rGdC("rGdC",BasePair<>::simplify_symmetry),
                 rCdG("rCdG",BasePair<>::simplify_symmetry);
                 
      Terminal_5TA3=0;
      //This accounts for a 5'T.A3' at the beginning
      if ( eq_nv(Terminal5_bp,TA) ) Terminal_5TA3++;
      //This accounts for a 3'A.T5' at the end
      if ( eq_nv(Terminal3_bp,AT) ) Terminal_5TA3++;

      Terminal_AU=0;
      //This accounts for a AU or UA at the beginning
      if ( eq_nv(Terminal5_bp,AU) or eq_nv(Terminal5_bp,UA) ) Terminal_AU++;
      if ( eq_nv(Terminal3_bp,AU) or eq_nv(Terminal3_bp,UA) ) Terminal_AU++;

      Terminal_AT=0;
      //This accounts for a AT or TA at the beginning
      if ( eq_nv(Terminal5_bp,AT) or eq_nv(Terminal5_bp,TA) ) Terminal_AT++;
      if ( eq_nv(Terminal3_bp,AT) or eq_nv(Terminal3_bp,TA) ) Terminal_AT++;

      Terminal_CG=0;
      //This accounts for a CG or GC at the beginning
      if ( eq_nv(Terminal5_bp,CG) or eq_nv(Terminal5_bp,GC) ) Terminal_CG++;
      if ( eq_nv(Terminal3_bp,CG) or eq_nv(Terminal3_bp,GC) ) Terminal_CG++;

      Terminal_dArU=0;
      if ( eq_nv(Terminal5_bp,dArU) or eq_nv(Terminal5_bp,rUdA) ) Terminal_dArU++;
      if ( eq_nv(Terminal3_bp,dArU) or eq_nv(Terminal3_bp,rUdA) ) Terminal_dArU++;

      Terminal_dTrA=0;
      if ( eq_nv(Terminal5_bp,dTrA) or eq_nv(Terminal5_bp,rAdT) ) Terminal_dTrA++;
      if ( eq_nv(Terminal3_bp,dTrA) or eq_nv(Terminal3_bp,rAdT) ) Terminal_dTrA++;

      Terminal_dCrG=0;
      if ( eq_nv(Terminal5_bp,dCrG) or eq_nv(Terminal5_bp,rGdC) ) Terminal_dCrG++;
      if ( eq_nv(Terminal3_bp,dCrG) or eq_nv(Terminal3_bp,rGdC) ) Terminal_dCrG++;

      Terminal_dGrC=0;
      if ( eq_nv(Terminal5_bp,dGrC) or eq_nv(Terminal5_bp,rCdG) ) Terminal_dGrC++;
      if ( eq_nv(Terminal3_bp,dGrC) or eq_nv(Terminal3_bp,rCdG) ) Terminal_dGrC++;

      Has_CG=false;
      Is_AT_only=true;
      CG_content=0;
      for (auto it : BP_reduced_duplex) //Attention: do not use BP_reduced_duplex for NN analysis
        {
        std::string bp=it.formatted_string_no_version();
        if (CG_equivalent_set.find(bp) != CG_equivalent_set.end()) 
          {
          Has_CG=true;
          CG_content++;
          }
        if (not (AT_equivalent_set.find(bp) != AT_equivalent_set.end()) ) Is_AT_only=false; 
        }
      fCG=CG_content/static_cast<value_type>(BP_number);
        
      }
      

    /// \brief Checks for selfcomplementarity and sets alpha value
    ///
    /// \returns 1 if self-complementary, 4 otherwise
    inline value_type determine_alpha(void)
      {
      NucleotideSequence<> s(sequence.first.length()),c(sequence.second.length());
      s << sequence.first; c << sequence.second; c.reverse();
      if ((std::string)s == (std::string)c) alpha=1.0;
      else                                  alpha=4.0;
      return alpha;
      }

    /// \brief Adjust temperature to current reference concentration
    ///
    /// \attention This temperature adjustment is too approximate and should not be used.
    inline void adjust_temperature(void)
      {
      if (Ref_concentration != species_concentration)
        {
        // Note: it is correct to use the same alpha for conc1 and conc2
        temperature.adjusted=temperature.measured*log(species_concentration*Concentration_factor/alpha)/log(Ref_concentration*Concentration_factor/alpha);
        }
      else temperature.adjusted=temperature.measured;
      if (not Prediction_with_salt_correction) apply_salt_correction();
      }

    // Calculates new Tm for given Ct, but does not update any sequence info
    // ct in \micro M (1e-6 M)
    inline value_type calculate_temperature_from_gibbs(value_type &ct)
      {
      return enthalpy.measured*1000.0/(entropy.measured+R*log(ct*Concentration_factor/alpha))-273.15;
      }

    // Sets the DeltaG assuing enthaply in kcal/mol and entropy in cal/K.mol, result is kcal/mol 
    inline void calculate_gibbs_free_energy(value_type T=37.0)
      {
      T += 273.15;
      gibbs_free_energy.measured = enthalpy.measured - T*entropy.measured/1000.0;
      gibbs_free_energy.adjusted = enthalpy.adjusted - T*entropy.adjusted/1000.0;
      gibbs_free_energy.predicted = enthalpy.predicted - T*entropy.predicted/1000.0;
      }
      
   // Calculates the width of the absorption curve 
    inline void calculate_absorption_width(void)
      {
      if (enthalpy.measured  != value_type())
        absorption_width.measured  = -3.0/2.0*R*pow(temperature.measured+273.15,2)/(enthalpy.measured*1000);
      else absorption_width.measured = value_type();
      
      if (enthalpy.adjusted  != value_type())
        absorption_width.adjusted = -3.0/2.0*R*pow(temperature.adjusted+273.15,2)/(enthalpy.adjusted*1000);
      else absorption_width.adjusted = value_type();
      
      if (enthalpy.predicted  != value_type())
        absorption_width.predicted = -3.0/2.0*R*pow(temperature.predicted+273.15,2)/(enthalpy.predicted*1000);
      else absorption_width.predicted = value_type();
      
      CERR_DEBUG(DSQI_CLABWD) << "enthalpy.measured=" << enthalpy.measured
                              << " temperature.measured=" << temperature.measured 
                              << " enthalpy.predicted=" << enthalpy.predicted
                              << " temperature.predicted=" << temperature.predicted 
                              << " absorption_width.predicted=" << absorption_width.predicted << std::endl;
      }
      
      
    // Updates Tm for default Ct, updates sequence info
    inline void set_temperature_from_gibbs(void)
      {
      temperature.measured=calculate_temperature_from_gibbs(species_concentration);
      temperature.adjusted=temperature.measured;
      if (not Prediction_with_salt_correction) apply_salt_correction();
      }
            
    value_type apply_salt_correction(const value_type base_salt, //in mM same as SequenceInfo
                                     value_type target_salt,
                                     const salt_correction_scheme_type scheme=nocorrection)
      {
      value_type f1=0, f2=0, f3=0;
        
      Salt_corrected=false;
      value_type tm;
      if (Prediction_with_salt_correction) tm=temperature.predicted; else tm=temperature.measured;
      
      if (scheme == nocorrection) return tm;
      
      if (base_salt == target_salt) //if concentrations happen to be the same, do not adjust
        {
        Corrected_Tm_map[scheme] = tm;
        return tm;
        }
      
      if (target_salt == value_type())
        {
        if (salt_concentration["Na+"] == value_type())
          {
          CERR_ERROR(ERRTSCZ) << "target salt concentration, including data file, is zero, can not perform salt correction" << std::endl; 
          CERR_TERM
          }
        else target_salt = salt_concentration["Na+"];
        }
     
      value_type Base_salt_concentration_M = base_salt/1000; //converting mM -> M
      value_type Target_salt_concentration_M = target_salt/1000;
      
      value_type ln_target_base  = log(Target_salt_concentration_M/Base_salt_concentration_M);
      value_type log_target_base = log10(Target_salt_concentration_M/Base_salt_concentration_M);
      value_type sqr_diff        = pow(log(Target_salt_concentration_M),2) - pow(log(Base_salt_concentration_M),2);
      
      value_type corrected_tm;
      
      switch (scheme)
        {
        case owczarzy04eq19: f1=-3.22;    f2=6.39;      f3=0;        break;
        case owczarzy04eq20: f1=3.85e-5;  f2=-6.18e-5;  f3=0;        break;
        case owczarzy04eq21: f1=-4.62;    f2=4.52;      f3=-0.985;   break;
        case owczarzy04eq22: f1=4.29e-5;  f2=-3.95e-5;  f3=9.4e-6;   break;
        case chen13eq19:     f1=-1.842;   f2=4.314;     f3=0;        break;
        case chen13eq20:     f1=2.297e-5; f2=-4.575e-5; f3=0;        break;
        case chen13eq21:     f1=-1.842;   f2=2.675;     f3=-0.7348;  break;
        case chen13eq22:     f1=2.297e-5; f2=-2.886e-5; f3=7.575e-6; break;
        case schildkraut65:  f2=16.6; break;
        case santalucia98:   f1=0.368; break;
        case nocorrection: 
          f1=f2=f3=0; break;
        }

      if (pparameter_map)
        {
        value_type f1f=pparameter_map->get_if_exists("Na+:f1");
        value_type f2f=pparameter_map->get_if_exists("Na+:f2");
        value_type f3f=pparameter_map->get_if_exists("Na+:f3");
        if (f1f) f1=f1f;
        if (f2f) f2=f2f;
        if (f3f) f3=f3f;
        }
        
      switch (scheme)
        {
        //the following two take the f1, f2 and f3 passed to the function
        case tm_ln:
          if ( (f1 == 0) and (f2 == 0) and (f3 == 0))
            CERR_WARN(WF1F2F3Z) << "factors f1,f2 and f3 are all zero for salt correction scheme tm_ln" << std::endl;
          break;
        case rec_tm_ln:
          if ( (f1 == 0) and (f2 == 0) and (f3 == 0))
            CERR_WARN(WF1F2F3Z) << "factors f1,f2 and f3 are all zero for salt correction scheme rec_tm_ln" << std::endl;
        }
        
      value_type enth=enthalpy.predicted;
      
      switch (scheme)
        {
        //schildkraut65 for DNA https://doi.org/10.1002/bip.360030207
        case schildkraut65: 
                            Corrected_Tm_map[scheme] = corrected_tm = tm + f2*log_target_base;
                            Salt_corrected=true;
                            break;
        //owczarzy04 https://doi.org/10.1021/bi034621r for DNA Table 3
        case tm_ln:
        case owczarzy04eq19: 
        case owczarzy04eq21:
        case chen13eq19    : //chen13 https://doi.org/10.1021/bi4008275
        case chen13eq21    :
                            Corrected_Tm_map[scheme] =  corrected_tm = tm +(f1*fCG+f2)*ln_target_base +f3*sqr_diff;
                            Salt_corrected=true;
                            break;
         
        case rec_tm_ln:
        case owczarzy04eq20: 
        case owczarzy04eq22:
        case chen13eq20    :
        case chen13eq22    :
                            corrected_tm = 1/(tm+273.15) +(f1*fCG+f2)*ln_target_base +f3*sqr_diff;
                            corrected_tm = 1/corrected_tm-273.15;
                            Corrected_Tm_map[scheme] = corrected_tm;
                            Salt_corrected=true;
                            break;
        case nakano99      : //nakano99 Eq. (4) https://doi.org/10.1093/nar/27.14.2957
                            if ( (target_salt != 100) and (base_salt != 1000))
                              {
                              CERR_ERROR(ERRSCNWC) << "salt correction of type nakano99 only works for reducing from 1000 to 100 [Na+]" << std::endl; 
                              CERR_TERM;
                              }
                            corrected_tm = 0.876 * tm -5.148; 
                            Corrected_Tm_map[scheme] = corrected_tm;
                            Salt_corrected=true;
                            break;
                            
        case santalucia98:  if (enth == value_type()) enth = enthalpy.measured;
                            if (enth == value_type())
                              {
                              CERR_ERROR(ERRCCSC) << "Cannot calculate salt correction with zero enthaply for santalucia98" << std::endl;
                              CERR_TERM
                              }
                            enth *= 1000; //needs to be in cal/mol while the unit entered is kcal/mol
                            corrected_tm = 1/(tm+273.15) + (f1*length/enth)*ln_target_base;
                            corrected_tm = 1/corrected_tm-273.15;
                            Corrected_Tm_map[scheme] = corrected_tm;
                            Salt_corrected=true;
                            break;
                              
        case nocorrection:  corrected_tm = tm;
        }
           
      if (Prediction_with_salt_correction) 
        temperature.predicted=corrected_tm; //ATTENTION: this replaces the predicted value
      else temperature.adjusted=corrected_tm;
      
      return corrected_tm;
    
      }
    
   inline value_type apply_salt_correction(void)
      {
      return apply_salt_correction(salt_concentration["Na+"],Target_salt_ct,Salt_correction_scheme);
      }
      
    // if base_salt==0, apply with default values
    // if Target_salt_ct==0 use sequence salt_concentration as Target_salt_ct
    inline value_type apply_salt_correction(const value_type& base_salt, parameter_map_type* ppm=NULL)
      {
      if (ppm) pparameter_map=ppm;
      value_type corrected_tm;
      if (base_salt != value_type())
        {
        if (Target_salt_ct != value_type())
          corrected_tm=apply_salt_correction(base_salt,Target_salt_ct,Salt_correction_scheme);
        else
          corrected_tm=apply_salt_correction(base_salt,salt_concentration["Na+"],Salt_correction_scheme);
        }
      else
        corrected_tm=apply_salt_correction();
      return corrected_tm;
      }

  };//SequenceInfo
  
  
};//namespace
#endif
