// $Id: Duplex.h 1368 2024-08-31 14:16:49Z ge $
/// \file Duplex.h
/// \brief contains the Duplex class
///
/// $Revision: 1368 $
/// \author Gerald Weber <gweberbh@gmail.com>

#ifndef GBC_EXP_DUPLEX_H
#define GBC_EXP_DUPLEX_H "$Id: Duplex.h 1368 2024-08-31 14:16:49Z ge $"

#include "BasePairNeighbours.h"
#include "NucleotideSequence.h"
#include<boost/algorithm/string/split.hpp>                                      
#include<boost/algorithm/string.hpp>                                            
#include <algorithm>
#include "RegexPattern.h"

namespace gbc {

template<class _InternalTp=char>
/// \brief Contains a sequence (std::deque) of base pairs.
///
/// This class differs from NeighbourSequence in several ways.
/// Duplex contains a std::deque of objects of type BasePair, while
/// NeighbourSequence contains a sequence of BasePairNeighbours.
class Duplex: public std::deque<BasePair<_InternalTp> >
  {
  public:
    typedef _InternalTp                internal_type;  ///< type of _InternalTp
    typedef BasePair<_InternalTp>      base_pair_type; ///< type of BasePair<_InternalTp>
    typedef base_pair_type             pair_type;      ///< type of BasePair<_InternalTp>
    typedef std::deque<base_pair_type> deque_type;
    typedef unsigned long int          index_type;
    enum strand_alignment_type {antiparallel = 5335, parallel    = 5353};

    typename base_pair_type::SymmetryActionsType Symmetry_action;    ///< Flags which symmetry action will be performed
    NucleotideSequence<internal_type> Main_strand;          ///< Not in use yet
    NucleotideSequence<internal_type> Complementary_strand; ///< Not in use yet
    strand_alignment_type Strand_alignment;                 ///< defines how strands are aligned
    bool MainStrandUniformSugar,CmplStrandUniformSugar;     ///< true if all nuclotides in the strand have the same sugar type
    Nucleotide<>::sugar_type MainStrandDominantSugar,CmplStrandDominantSugar;

  Duplex(void): Symmetry_action(base_pair_type::do_not_simplify_symmetry), Strand_alignment(antiparallel),
                MainStrandUniformSugar(true), CmplStrandUniformSugar(true), MainStrandDominantSugar(), CmplStrandDominantSugar() {}

  Duplex(const std::string& dup,
         const typename base_pair_type::SymmetryActionsType symmetry_action=base_pair_type::do_not_simplify_symmetry)
        :Symmetry_action(symmetry_action), Strand_alignment(antiparallel),
         MainStrandUniformSugar(true), CmplStrandUniformSugar(true), MainStrandDominantSugar(), CmplStrandDominantSugar()
    {
    this->set_duplex(dup);
    }

  Duplex(const NucleotideSequence<internal_type>& nuc1, ///< Main strand
         const NucleotideSequence<internal_type>& nuc2, ///< Complimentary strand
         const typename base_pair_type::SymmetryActionsType symmetry_action=base_pair_type::do_not_simplify_symmetry)
        :Symmetry_action(symmetry_action), Strand_alignment(antiparallel)
    {
    deque_type::clear();
    typename NucleotideSequence<internal_type>::const_iterator n1,n2;
    for(n1=nuc1.begin(), n2=nuc2.begin(); 
       (n1 != nuc1.end()) && (n2 != nuc2.end()); ++n1, ++n2)
      {
      BasePair<internal_type> bp(*n1,*n2,Symmetry_action);
      this->push_back(bp);
      }
    if (nuc1.StrandDirection == nuc2.StrandDirection) Strand_alignment=parallel; else Strand_alignment=antiparallel;
    if (Symmetry_action == base_pair_type::simplify_symmetry) this->reduce_to_smallest_bp_symmetry();
    }

/*  Duplex(const nucleotide_sequence_type& fst,const nucleotide_sequence_type& scnd)
    {
    this->first=fst;
    this->second=scnd;
    }
*/

  inline Duplex& set_duplex(const std::string& dup)
    {
    const boost::regex slash_separated(DUPLEX_SET_DUPLEX_PATTERN); //here we try to find occurences like GTTCAC/CAAGTG and d(TGCATGCA)/r(ACGTACGT)
    boost::smatch found;
    if (boost::regex_search(dup,found,slash_separated))
      {
      std::string mn=found[1], cp=found[2];

      NucleotideSequence<internal_type> nuc1,nuc2;
      nuc1.assign(mn);
      nuc2.assign(cp);
      *this=Duplex(nuc1,nuc2,Symmetry_action);
      if (nuc1.StrandDirection == nuc2.StrandDirection) Strand_alignment=parallel; else Strand_alignment=antiparallel;

      }
    else
      {
      //Here we try to find occurences of type "AC_GT^a_AC_GT"
      std::string::size_type underscore=dup.find("_");
      if (underscore != std::string::npos)
        {
        std::deque<std::string> extracted;
        boost::algorithm::split(extracted, dup, boost::is_any_of("_"),boost::token_compress_on);
        std::deque<std::string>::iterator i;
        for(i=extracted.begin(); i!= extracted.end(); i++) this->push_back(base_pair_type(*i));
        }
      else
        {
        NucleotideSequence<internal_type> nuc(dup.length());
        nuc << dup;
        *this=nuc;
        }
      }
    return *this;
    }

  template<class _InternalTp2>
  inline Duplex& operator+=(BasePair<_InternalTp2> bp)
    {
    if (Symmetry_action == base_pair_type::simplify_symmetry) bp.reduce_to_smallest_symmetry();
    deque_type::push_back(bp);
    return *this;
    }
    
  /// Reverse
  inline void reverse(void)
    {
    std::reverse(this->begin(),this->end());
    }
    
  /// Swap main strand and complementary strands, eg "CAAGTG/GTTCAC" becomes "GTTCAC/CAAGTG"
  inline void swap_strands(void)
    {
    typename deque_type::iterator it;
    for(it=this->begin(); it!=this->end(); ++it)
      {
      it->swap();
      }
    }

  /// Reduce to lexical symmetry
  inline bool reduce_to_smallest_duplex_symmetry(void)
    {
    bool modified=false;
    Duplex<internal_type> tmp=*this;
    tmp.reverse(); tmp.swap_strands();
    if ((std::string)tmp < (std::string)*this)
      {
      this->reverse(); this->swap_strands();
      modified=true;
      }
    return modified;
    }


  /// For each base pair reduce to equivalents, eg. TA becomes AT
  inline bool reduce_to_smallest_bp_symmetry(void)
    {
    bool modified=false;
    typename deque_type::iterator it;
    for(it=this->begin(); it!=this->end(); ++it)
      {
      bool m=it->reduce_to_smallest_symmetry();
      modified=modified || m;
      }
    return modified;
    }

  template<class _InternalTp2>
  inline Duplex& operator+=(const Nucleotide<_InternalTp2> nuc)
    {
    BasePair<_InternalTp2> bp(nuc,Symmetry_action);
    if (Symmetry_action == base_pair_type::simplify_symmetry) bp.reduce_to_smallest_symmetry();
    deque_type::push_back(bp);
    return *this;
    }

  template<class _InternalTp2>
  /// \brief Copies a NucleotideSequence to the Duplex
  ///
  /// Each Nucleotide is converted to a BasePair and added to the
  /// end of the Duplex.
  ///
  /// \attention The previous content of the Duplex is erased
  inline Duplex& operator=(const NucleotideSequence<_InternalTp2>& nucseq)
    {
    deque_type::clear();
    typename NucleotideSequence<_InternalTp2>::const_iterator nt;
    for(nt=nucseq.begin(); nt != nucseq.end(); ++nt)
      this->operator+=(*nt);
    return *this;
    }

  inline BasePairNeighbours<_InternalTp> operator()(const index_type bg, const index_type ed) const
    {
    BasePairNeighbours<_InternalTp> nb((*this)[bg],(*this)[ed]);
    return nb;
    }

  inline NucleotideSequence<_InternalTp> main_strand(void) const
    {
    NucleotideSequence<_InternalTp> strand(this->size());
    typename deque_type::const_iterator it;
    for(it=this->begin(); it!=this->end(); ++it)
      {
      strand.push_back(it->first);
      }
    return strand;
    }

  inline NucleotideSequence<_InternalTp> complementary_strand(void) const
    {
    NucleotideSequence<_InternalTp> strand(this->size());
    typename deque_type::const_iterator it;
    for(it=this->begin(); it!=this->end(); ++it)
      {
      strand.push_back(it->second);
      }
    return strand;
    }

   inline bool self_complementary(void) const
     {
     NucleotideSequence<_InternalTp> s(main_strand()),c(complementary_strand());
     c.reverse();
     if (s == c) return true;
     else        return false;
     }
     
   inline std::pair<bool,Nucleotide<>::sugar_type> sugar_analysis(int i) const
     {
     std::map<Nucleotide<>::sugar_type,int> sugar;
     for (auto it=this->begin(); it != this->end(); it++)
       {
       if (i == 0) sugar[it->first.sugar()]++;
       else        sugar[it->second.sugar()]++;
       }
       
     bool uniform = sugar.size() == 1;
     
     Nucleotide<>::sugar_type dominant;
     
     int max=0;
     for(auto ms:sugar)
       {
       if (sugar[ms.first] > max)
         {
         max = sugar[ms.first];
         dominant = ms.first;
         }
       }
     std::pair<bool,Nucleotide<>::sugar_type> res(uniform,dominant);
     return res;
     }

     
   //We consider a hybrid when all nucleotides have the same sugar on one strand but not on the other
   //this is hybrid t(AATACAGA)/d(TTATGTCT)
   //but this is not AA<lT>ACAGA)/TTATGTCT
   inline bool is_hybrid(void) const
     {
     std::pair<bool,Nucleotide<>::sugar_type> main=sugar_analysis(0), cmpl=sugar_analysis(1);
     
     if (main.first and cmpl.first and (this->begin()->first.sugar() != this->begin()->second.sugar()) )
       return true;
     else return false;
     }

  /// Operator which returns a concatenated string of symbols representing the duplex.
  inline std::string formatted_string(void) const
    {
    std::string first,second,out;
    typename deque_type::const_iterator it=this->begin();
    bool hybrid=is_hybrid(); 
    if (hybrid)
      {
      first  = std::string(1,it->first.sugar_char())+"(";
      second = std::string(1,it->second.sugar_char())+"(";
      for (it=this->begin(); it != this->end(); it++)
        {
        first += it->first.symbol();
        second+= it->second.symbol();
        }
      first  += ")";
      second += ")";
      }
    else
      {
      std::pair<bool,Nucleotide<>::sugar_type> main=sugar_analysis(0), cmpl=sugar_analysis(1);
      for (it=this->begin(); it != this->end(); it++)
        {
        if (it->first.sugar() == main.second)
          first += it->first.symbol();
        else 
          first += "<" + it->formatted_first_string() + ">";
        if (it->second.sugar() == cmpl.second)
           second += it->second.symbol();
        else 
          second += "<" + it->formatted_second_string() + ">";
        }
      }
    out = first + "/" + second;
    return out;
    }

  inline operator std::string(void) const
    {
    return this->formatted_string();
    }

  inline std::string basepair_list_formatted(void) const
    { 
    std::string out;
    for (auto it=this->begin(); it != this->end(); it++, out += std::string(" "))
      {
      out += (std::string)(*it);
      }
    return out;
    }
  
    
  /// Extractor for printing BasePair symbols like AT/CG/GC
  inline friend std::ostream& operator<<(std::ostream &out,const Duplex &dp)
    { 
    std::copy(dp.begin(),dp.end(),std::ostream_iterator<base_pair_type>(out,"/"));
    return out;
    }

  };//ends class


};//ends namespace
#endif
