// $Id: BasePair.h 1369 2024-11-29 14:20:00Z ge $
/// \file BasePair.h
/// \brief contains the BasePair class
///
/// $Revision: 1369 $
/// \author Gerald Weber

#ifndef GBC_EXP_BASEPAIR_H
#define GBC_EXP_BASEPAIR_H "$Id: BasePair.h 1369 2024-11-29 14:20:00Z ge $"

#include "Nucleotide.h"
#include <algorithm>
#include "RegexPattern.h"

namespace gbc {

template<class _RepTp=char, class _IndexTp=char>
/// \brief Forms a base pair out of two Nucleotides.
/// 
/// This class forms a std::pair from Nucleotide<_RepTp,_IndexTp>.
class BasePair: public std::pair<Nucleotide<_RepTp,_IndexTp>,Nucleotide<_RepTp,_IndexTp> >
  {
  public:
    typedef _RepTp internal_type;                ///< type of _RepTp
    typedef Nucleotide<_RepTp,_IndexTp> nucleotide_type;  ///< type of Nucleotide<_RepTp>
    typedef std::pair<Nucleotide<_RepTp>,Nucleotide<_RepTp> > pair_type;
    typedef std::pair<internal_type,internal_type> internal_pair_type;
    typedef std::string version_type;
    
    version_type Version;     ///< Version of base pair eg AT^a
    static const char version_char='^';
    
    enum IncludeVersionType { add_version = 1, no_version = 0};
    
    enum SymmetryActionsType {do_not_simplify_symmetry = 1 /* formely context_dependent=true */, 
                              simplify_symmetry        = 0 /* formely not_context_dependent=false */};

    static const SymmetryActionsType default_symmetry_action=simplify_symmetry;

    SymmetryActionsType Symmetry_action; ///< Base pairs may simplified according to their 5´->3' directions, for example TA would become AT 
                                         ///< if Symmetry_action=simplify_symmetry
    std::string separation_string;       ///< Separator string for the string representation.

  BasePair(void): Version(), Symmetry_action(simplify_symmetry), separation_string() {}
  
  BasePair(char fst, char scnd, SymmetryActionsType symmetry_action=simplify_symmetry, std::string version=std::string())
  /// \brief Concstructs a BasePair from its named Nucleotides.
  ///
  /// This constructor works by converting the char representation
  /// to a type Nucelotide which is then assigned to the BasePair.
   : pair_type(nucleotide_type(fst),nucleotide_type(scnd)), Version(version),
     Symmetry_action(symmetry_action), separation_string()
   {}

  BasePair(char fst, SymmetryActionsType symmetry_action=simplify_symmetry)
  /// \brief Concstructs a BasePair from its named Nucleotide.
  ///
  /// This constructor works by converting the char representation
  /// to a type Nucelotide which is then assigned to the BasePair.
   : pair_type(nucleotide_type(fst),nucleotide_type(fst)),
     Symmetry_action(symmetry_action), separation_string()
   {
   this->second.complementary();
   }

  BasePair(std::string bpname, SymmetryActionsType symmetry_action=default_symmetry_action)
  /// \brief Concstructs a BasePair from its named Nucleotides.
  ///
  /// This constructor works by converting the string representation
  /// to a type Nucelotide which is then assigned to the BasePair.
   : Symmetry_action(symmetry_action), separation_string()
   {
   string_interpretation(bpname);
   }

  template<class _RepTp2>
  /// \brief Constructs a BasePair from Nucleotide and its complement.
  BasePair(const Nucleotide<_RepTp2> fst, SymmetryActionsType symmetry_action=default_symmetry_action)
    : pair_type(fst,fst),
      Symmetry_action(symmetry_action), separation_string()
    {
    this->second.complementary();
    }

  template<class _RepTp2>
  /// \brief Assigns two Nucleotide objects to form a BasePair.
  BasePair(const Nucleotide<_RepTp2> fst, const Nucleotide<_RepTp2> scnd, SymmetryActionsType symmetry_action=default_symmetry_action)
    : pair_type(fst,scnd), 
      Symmetry_action(symmetry_action), separation_string()
    {}

  template<class _RepTp2>
  /// \brief Assigns a nucleotide, and its complement, to the BasePair.
  BasePair& operator=(const Nucleotide<_RepTp2>& nuc)
    {
    this->first=nuc;
    this->second=nuc;
    this->second.complementary();

    return *this;
    }

  /// \brief Swaps the two Nucleotides of the Base Pairs.
  void swap(void)
    {
    std::swap(this->first,this->second);
    }

  /// \brief Converts this BasePair to it's cheapest symmetry equivalent.
  ///
  /// We apply the symmetry operations of swapping.
  /// Then we retain the one that yields the lowest internal
  /// representation value using the operator<. 
  ///
  /// \attention No operation is carried out if Symmetry_action is do_not_simplify_symmetry
  inline bool reduce_to_smallest_symmetry(void)
    {
    bool reduced=false;
    if (Symmetry_action==simplify_symmetry)
      { 
      BasePair sw=*this;
      sw.swap();
      if (sw < *this) {*this=sw; reduced=true;}
      }
    return reduced;
    }
    
    /// Converts a string representation into its nucleobase and sugar components. 
    /// For example "rU" will be broken down as sugar=r and nucleobase=U
    inline bool string_interpretation (const std::string smb)
      {
      bool ok1=false, ok2=false;
      const boost::regex pattern(BASEPAIR_STRING_INTERPRETATION_PATTERN);
      boost::smatch found;
      if (boost::regex_search(smb,found,pattern)) 
        {
        ok1=this->first.string_interpretation(found[1]);
        ok2=this->second.string_interpretation(found[2]);
        Version=found[3];
        }
      return ok1 and ok2;
      }

        /// returns the symbolic representation of the nucleobases, e.g. AT or CT
  ///
  /// This method makes use of the separation_string which by default is set to a 
  /// empty string. Possible uses are "/" which would give results such as
  /// "A/T" or "C/G". 
  inline std::string nucleobases_string(IncludeVersionType versionize=add_version) const
    {
    std::string res=this->first.symbol()+separation_string+this->second.symbol();
    if (versionize and Version != std::string())
      {
      res.append(1,version_char);
      res.append(this->Version);
      }
    return res;
    }

  /// returns the symbolic representation of the nucleosides (sugar+nucleobase), e.g. dAdT or rCdT
  ///
  /// This method makes use of the separation_string which by default is set to a 
  /// empty string. Possible uses are "/" which would give results such as
  /// "dA/dT" or "rC/rG". 
  inline std::string nucleosides_string(IncludeVersionType versionize=add_version) const
    {
    std::string res=this->first.nucleoside_string()+separation_string+this->second.nucleoside_string();
    if (versionize and Version != std::string())
      {
      res.append(1,version_char);
      res.append(this->Version);
      }
    return res;
    }
    
  inline void rm_version(void) {Version.clear();}
  
  inline bool is_hybrid(void) const
    {
    return this->first.sugar() != this->second.sugar();
    }
    
  /// \brief Produces a string which describes the BasePair.
  ///
  /// When both Nucleotides have the same sugar, i.e., both are either DNA or RNA this returns something like AT or CG.
  /// For hybrid DNA-RNA it returns something line dArU or dCrG. 
  inline std::string formatted_string(IncludeVersionType versionize=add_version) const
    {
    std::string res;
    if (is_hybrid()) 
      res = nucleosides_string(versionize);
    else                                         
      res = nucleobases_string(versionize);
    return res;
    }

  inline std::string formatted_first_string(void) const
    {
    std::string res;
    if (is_hybrid()) 
      res = this->first.nucleoside_string();
    else                                         
      res = this->first.nucleobase_char();
    return res;
    }

  inline std::string formatted_second_string(void) const
    {
    std::string res;
    if (is_hybrid()) 
      res = this->second.nucleoside_string();
    else                                         
      res = this->second.nucleobase_char();
    return res;
    }
    
    
  operator std::string(void) const
    {
    return this->formatted_string();
    }
    
  inline std::string formatted_string_no_version(void) const
    {
    return this->formatted_string(no_version);
    }
    
  //This solves insertion problems with ReferenceSet
  //Without overloading this operator bp1 and bp2 may return equal even if they have different versions
  friend bool operator<(const BasePair& bp1, const BasePair& bp2)
    {
    pair_type p1=static_cast<pair_type>(bp1), p2=static_cast<pair_type>(bp2);
    if (p1 == p2) return bp1.Version < bp2.Version;
    else          return p1 < p2;
    }

  operator internal_pair_type(void) const
    {return internal_pair_type(this->first.representation,this->second.representation);}
  
  /// \brief Returns a new BasePair with the two Nucleotides swapped.
  ///
  /// This method is called axis_swap such that it is later not
  /// confused with stack_swap of the BasePairNeighbours class.
  friend inline BasePair axis_swap(const BasePair& bp)
    {
    BasePair newpair=bp;
    newpair.swap();
    return newpair;
    }

  /// \brief Checks if two base pairs are axis-symmetric to each other. 
  /// \attention This is done regardless of the 3'->5' direction by
  /// simply checking the the representations
  friend inline bool axis_symmetry(const BasePair& bp1, const BasePair& bp2)
    {
    return   (bp1.first  == bp2.second)
          && (bp1.second == bp2.first);
    }

  /// \brief Cheks if the two nucleotides are the same.
  friend inline bool check_symmetry(const BasePair& bp)
    {
    return (bp.first == bp.second);
    }


  /// Extractor for printing BasePair symbols
  ///
  /// This method first converts the BasePair to its string representation.
  friend std::ostream& operator<<(std::ostream &out,const BasePair &bp)
    { 
    out << static_cast<std::string>(bp);
    return out;
    }
    
  /// Equal operator compares the internal representations of two BasePair
  inline friend bool operator==(const BasePair& bp1, const BasePair& bp2)
    {
    return (bp1.first == bp2.first) and (bp1.second == bp2.second) and (bp1.Version == bp2.Version);
    }

  /// Equal operator compares the internal representations of two BasePair but ignoring version
  inline friend bool eq_nv(const BasePair& bp1, const BasePair& bp2)
    {
    return (bp1.first == bp2.first) and (bp1.second == bp2.second);
    }

    /// Equal operator compares the internal representations of two BasePair but ignoring sugar and version
  inline friend bool eq_nsnv(const BasePair& bp1, const BasePair& bp2)
    {
    return eq_ns(bp1.first,bp2.first) and eq_ns(bp1.second,bp2.second);
    }
    
  };//ends class



};//ends namespace
#endif
