// $Id: ParameterMap.h 1251 2021-12-15 18:18:13Z ge $
/// \file ParameterMap.h
/// \brief This is a generic class that handles parameters in form XX:YY
///
/// We need to handle parameters frequently. Here we are able to store and
/// retrieve parameters of type XX:YY ZZ, XX is a general designation such as AT,
/// YY is the actual parameter name and Z its value. We can have more than one
/// general designation XX1:XX2:XX3:YY which expands as XX1:YY XX2:YY XX3:YY.
/// Also we can use * in place of XX meaning that any parameter request without
/// a general_class XX will be assigned this value.
/// $Revision: 1251 $
/// \author Gerald Weber <gweberbh@gmail.com>
#ifndef GBC_EXP_PARAMETERMAP_H
#define GBC_EXP_PARAMETERMAP_H "$Id: ParameterMap.h 1251 2021-12-15 18:18:13Z ge $"
#include "ErrorCodes.h"
#include <map>
#include <deque>
#include <utility>
#include <valarray>
#include <iostream>
#include <fstream>
#include <sstream>
#include "MathAux.h"
#include<boost/algorithm/string/split.hpp>                                      
#include<boost/algorithm/string.hpp>                                            
#include <boost/regex.hpp>
#include "RegexAux.h"
 
namespace gbc
  {
  template<class _Tp=double>
  /// \brief This class handles parameters used by Potentials and retrieves them from files.
  ///
  /// The parameters are identified by an ID usually constructed from a parameter name
  /// and an object ID. For example the parameter D of the morse potential is named
  /// morse.D and if used for an AT base pair it appears as AT:morse.D
  /// The parameter file may define a default ID "*", which matches any parameter
  /// which has not been identified previsouly. 
  class ParameterMap: public std::map<std::string,_Tp>
    {
    public:
      typedef _Tp                                        parameter_type;
      typedef std::string                                identifier_type;
      typedef std::pair<identifier_type,parameter_type>  id_par_type;
      typedef std::pair<parameter_type,bool>             par_bool_type;
      typedef std::deque<identifier_type>                id_deque_type;
      typedef std::map<identifier_type,parameter_type>   map_type;
      typedef std::map<identifier_type,std::string>      map_fromfile_type;
      typedef std::map<identifier_type,long int>         map_used_type;
      typedef std::deque<identifier_type>                field_deque_type;  ///< List of identifications
      typedef std::deque<parameter_type>                 parameter_deque_type;
      typedef std::map<identifier_type,identifier_type>  equivalence_map_type;
      typedef std::deque<std::string>                    string_deque_type;

    static const char Field_separator=':'; ///< This is how we separate each field.
    static const char Default_sub_id='*';  ///< The default sub-id.

    bool Record_variation_differences;
    bool Debug;
    identifier_type Identify;
    parameter_deque_type Randomization_differences;
    map_type Temporary_storage;
    map_fromfile_type FromFileMap;
    map_used_type UsedMap;
    equivalence_map_type EquivalentID;
    string_deque_type MissingParameters; // Holds a list of parameters that were not found
    
    ParameterMap(void): Record_variation_differences(false), Debug(false) {}

    /// \brief Sets the parameter identified by its ID.
    ///
    /// If the parameter cannot be found by its ID we search for a default
    /// which starts by "*:"
    /// ATTENTION: true if parameter was **modified**
    inline bool get_parameter(const identifier_type id,  ///< ID or list of IDs
                                parameter_type& par,     ///< The parameter that is modified
                                bool complain=true       ///< Complains if not found
                             )  
      {
      bool modified=false;
      find_parameter(id,par,modified,complain);
      return modified;
      }
      
    inline void used_map_inc (const identifier_type id)
      {
      if (UsedMap.count(id)  > 0) UsedMap[id]++; else UsedMap[id]=1;
      }
     
    inline long int how_many_times_used (const identifier_type id)

      {
      if (UsedMap.find(id) != UsedMap.end() ) 
        return UsedMap[id];
      else 
        return 0;
      }
      
    /// \brief Sets the parameter identified by its ID.
    ///
    /// If the parameter cannot be found by its ID we search for a default
    /// which starts by "*:"
    /// ATTENTION: true if parameter was found
    inline bool find_parameter(const identifier_type id,  ///< ID or list of IDs
                                parameter_type& par,     ///< The parameter that is modified
                                bool &modified,          ///< This is what differentiates this function from get_parameter 
                                bool complain=true       ///< Complains if not found
                             ) 
      {
      auto it=this->find(id); 
      if (it != this->end()) 
        {
        modified = par != it->second;
        par=it->second;
        used_map_inc(it->first);
        return true;//found
        }
      else 
        {
        //If the parameter was not found by its id, we need to try generic parameters, for this
        //first we separate the id into prefix and suffix, e.g. AT:morse.D becomes sf[0]=morse.D and sf[1]=AT
        //Additional prefixes, such as in AT:TA:morse.D resulting in sf[2]=TA will be silently ignored
        field_deque_type sf=separate(id);
      
        if (sf.size() == (size_t)2) //one should not be searching for multiple prefixes
          {
          std::string prefix = sf[1];
          std::string suffix = sf[0];
          identifier_type target = prefix+Field_separator+suffix;
          identifier_type generic;
          bool found = false;
        
          //First we look if there isn't already an established equivalence
          if (found = (EquivalentID.find(target) != EquivalentID.end()) )
            {
            generic = EquivalentID[target];
            it=this->find(generic);
            found = (it != this->end()); //possibly redundant
            }
                  
          if (not found) // ATTENTION: experimental and currently works only for NN
            {
            // perl -Mre=debug -e '"TT^j_TT^j" =~ /^(.+?)(|\^.+)\_(.+?)(|\^.+)$/; print "res=[$1] [$3]";'
            boost::regex match("^(.+?)(|\\^.+)\\_(.+?)(|\\^.+)$");
            std::string rep=boost::regex_replace(prefix,match,"\\1_\\3");
            generic=rep+Field_separator+suffix;
            if (rep != prefix) it=this->find(generic);
            if (found = (it != this->end()) )
              {
              CERR_WARN(WUNNG) << "using NN generic " << generic << " for " << target << std::endl;
              EquivalentID[target]=generic;
              }
            else generic.clear();
            }
            
          if (not found) // ATTENTION: experimental and currently works only for BP
            {
            // perl -Mre=debug -e '"TT^j" =~ /^([^_]+?)\^(.*?)$/; print "res=[$1] [$2]";'
            boost::regex match("^([^_]+?)\\^(.*?)$");
            std::string rep=boost::regex_replace(prefix,match,"\\1");
            generic=rep+Field_separator+suffix;
            if (rep != prefix) it=this->find(generic);
            if (found = (it != this->end()) )
              {
              CERR_WARN(WUBPG) << "using BP generic " << generic << " for " << target << std::endl;
              EquivalentID[target]=generic;
              }
            else generic.clear();
            }

          if (not found) //now we try a completely generic parameter
            {
            generic="*:"+suffix;
            it=this->find(generic);
            if (found = (it != this->end()) ) 
              {
              CERR_DDEBUG(DPMA_UGF) << "using " << generic << " for " << target << std::endl;
              EquivalentID[target]=generic;
              }
            else generic.clear();
            }

          if (found) 
            {
            modified = par != it->second;
            par=it->second;
            used_map_inc(generic); //we increment the count of the generic match
            return true;
            }
          else 
            {
            MissingParameters.push_back(id);
            if (complain)
              {
              CERR_ERROR(ERRNPF) << "no specific or generic parameter found for \"" << id << "\"" << std::endl;
              CERR << "\nWe currently have these parameters in our map:\n";
              this->print_parameters_and_stats(CERR); 
              CERR_TERM
              }
            return false;
            }
          }
        else
          {
          if (sf.size() > (size_t)2)
            {
            CERR_ERROR(ERRCHMP) << "cannot handle multiple prefixes for " << id << std::endl;
            CERR_TERM
            }
          }
        }
     MissingParameters.push_back(id);
     if (complain)
       {
       CERR_ERROR(ERRNPF) << "no specific or generic parameter found for \"" << id << "\"" << std::endl;
       CERR << std::endl << "We currently have these parameters in our map:" << std::endl;
       this->print_parameters_and_stats(CERR); 
       CERR_TERM
       }
     return false;
     }

    void list_missing_parameters_and_exit(std::string info=std::string())
      {
      if (MissingParameters.size() > 0)
        {
        for(auto &it : MissingParameters) CERR_ERROR(ERRNPF) << info << it << std::endl;
        CERR_TERM
        }
      }

    /// \brief Gets the parameter identified by its ID.
    ///
    /// If the parameter cannot be found by its ID we search for a default
    /// which starts by "*:"
    /// \Returns: parameter 
    inline parameter_type  get_parameter_or_generic(const identifier_type id,  ///< ID or list of IDs
                                bool complain=true       ///< Complains if not found
                             )  
      {
      parameter_type par;
      bool modified;
      find_parameter(id,par,modified,complain);
      return par;
      }
      
   // Gets a list of ID's that matches the regular expression
   // The list may be a partial match controled by position of submatch
   inline id_deque_type get_regex_id_list(std::string regex, int pos)  ///< ID given as regex
      {
      id_deque_type found;
      for(auto &it: *this) if (reg_match(it.first,regex)) found.push_back(reg_match_string(it.first,regex,pos));
      return found;
      }

   inline map_type get_regex_id_par_map(std::string regex)  ///< ID given as regex
      {
      map_type found;
      for(auto &it: *this) if (reg_match(it.first,regex)) {found[it.first]=it.second; used_map_inc(it.first); }
      return found;
      }
     
    inline void internally_changed(const identifier_type id)
      {
      if (FromFileMap[id][0] != '*')
        {
        FromFileMap[id]="* " + FromFileMap[id];
        }
      }
     
    /// Sets a new value par in the existing parameter in map
    /// Note that we do not search in EquivalentID map here as the parameter may now assume a different value from its generic 
    /// counterpart, in this case it will be inserted as new parameter
    inline bool set_parameters(const identifier_type id,  ///< ID or list of IDs
                               const parameter_type par) ///< The value of the parameter that we should set
      {
      typename map_type::iterator it=this->find(id);
      if (it != this->end()) 
        {
        it->second=par;
        internally_changed(id);//marked as changed even if it->second==par
        return true;
        }
      else 
        {
        // For the case where several base pairs, or nearest neighbour pairs are
        // assigned the several parameter value in the form AT:TA:par we first separate
        // the fields AT, TA, par with the function separate.
        // sf[0] is the name of the parameter (par in our example).
        field_deque_type sf=separate(id);
        if (sf.size() > (size_t)0)
          {
          for (size_t n=1; n < sf.size(); ++n)
            {
            identifier_type new_id(sf[n]+Field_separator+sf[0]);
            it=this->find(new_id);
            if (it != this->end()) 
              {
              it->second=par;
              internally_changed(id);//marked as changed even if it->second==par
              }
            else insert_parameter(new_id,par,"new");
            }
          return true;
          }
        else return false;
        }
     CERR_WARN(WPIDNF) << "parameter " << id << " not found." << std::endl;
     return false;
     }

    /// \brief Creates ID with empty parameter
    inline void insert_id(identifier_type id)
      {
      this->insert(typename map_type::value_type(id,parameter_type()));
      }

    /// \brief Inserts ID and its parameter
    inline void insert_parameter(identifier_type id, const parameter_type& par, std::string file_name=std::string())
      {
      if (this->find(id) != this->end())
        {
        if (this->find(id)->second != par)
          {
          if (file_name.size()) CERR_WARN(WROVNV) << "replacing " << id 
            << " old value=" << this->find(id)->second
            << " new value=" << par << " (" << file_name << ")" << std::endl;
          this->FromFileMap[id]=std::to_string(par) + ":" + file_name + " <> " + this->FromFileMap[id];
          this->find(id)->second=par;
          }
        else this->FromFileMap[id]=std::to_string(par) + ":" + file_name + " = " + this->FromFileMap[id];
        }
      else 
        {
        this->insert(typename map_type::value_type(id,par));
        this->FromFileMap[id]=std::to_string(par) + ":" +file_name;
        EquivalentID.erase(id); //If this id was originally obtained from an generic parameter, it need to be removed from this map      
        }
      }

    /// \brief Randomises the parameters by a certain amount
    inline void randomise(parameter_type rnd) ///< Average deviation
      {
      if (Temporary_storage.size() > 0) restore_original();
      else Temporary_storage=static_cast<map_type>(*this);
      for(auto it=this->begin(); it != this->end(); ++it)
        {
        parameter_type temp=it->second;
        double dice=gauss_ran(rnd);
        it->second *= (1.0+dice);
        if (temp != parameter_type() && Record_variation_differences)
          Randomization_differences.push_back((temp-it->second)/temp);
        }
      }

    inline void restore_original(void)
      {
      static_cast<map_type>(*this)=Temporary_storage;
      }

    /// \brief Prints a list of IDs and parameters
    void print_parameters_and_stats(std::ostream& out, std::string header=std::string())
      {
      if (not header.empty()) out << header << std::endl;
      out << "#parameter map has " << this->size() << " elements" << std::endl;
      for(auto it=this->begin(); it != this->end(); ++it)
        {
        identifier_type id = it->first;
      
        for(auto eit : EquivalentID) if (eit.second == it->first) id = join_prefixes(id,eit.first); 
         
        out << id << " " << it->second;

        auto used=how_many_times_used(it->first);
        out << " [" << FromFileMap.find(it->first)->second << "] (" << used << ")" << std::endl;
        }
      }

    /// \brief Prints a list of IDs and parameters
    friend bool operator==(const ParameterMap& pm1, const ParameterMap& pm2)
      {
      if (pm1.size() != pm2.size()) return false;
        else                        return std::equal(pm1.begin(), pm1.end(), pm2.begin());
      }

    /// \brief Splits the field into several components separated by colon.
    ///
    /// This method first gets the parameter name, and afterwards its sub-id.
    inline field_deque_type separate(identifier_type field) const
      {
      field_deque_type extracted;

      std::string::size_type pos=field.find(Field_separator);
      if (pos != std::string::npos)
        {
        boost::algorithm::split(extracted, field, boost::is_any_of(":"),boost::token_compress_on);
        //here we put the last item to front
        extracted.push_front(extracted.back());extracted.pop_back();
        }
      else 
        {
        boost::algorithm::split(extracted, field, boost::is_any_of("+=, "),boost::token_compress_on);
        }
      //std::copy(extracted.begin(),extracted.end(),std::ostream_iterator<std::string>(std::cout," "));
      return extracted; 
      };
      
    inline identifier_type join_prefixes(identifier_type p1, identifier_type p2)
      {
      field_deque_type s1=separate(p1);
      field_deque_type s2=separate(p2);
      if (s1[0] != s2[0])
        {
        CERR_IERROR(ERRTJDPT) << "trying to join different parameter types " <<  s1[0] << " and " << s2[0] << std::endl;
        CERR_TERM
        }
      std::string suffix = s1[0];
      s1.pop_front(); s2.pop_front();
       
      identifier_type joined;
      for (auto i1=s1.begin(); i1 != s1.end(); i1++) joined += *i1 + ":";
      for (auto i2=s2.begin(); i2 != s2.end(); i2++) joined += *i2 + ":";
      
      joined += suffix;
      
      return joined;
      }
      
    inline void read_from_list_of_files(string_deque_type &par_file_deque)
      {
      for(auto pit=par_file_deque.begin(); pit != par_file_deque.end(); pit++)
        read_from(*pit);
      }

    inline void read_from(std::string fname)
      {
      COUT_INFO(INFORPF) << "Reading parameters from " << fname << std::endl;
      std::ifstream parfile; parfile.open(fname.c_str());
      if (!parfile.good()) 
        {
        CERR_ERROR(ERRCNRF) << " Could not read " << fname << std::endl;
        CERR_TERM
        }
      parfile >> Identify; //Reads the first word which is used as identifier
      std::string comment;
      std::getline(parfile,comment); //Reads the remaing content of file which is then ignored
      while (!parfile.eof())
        {
        std::string line;
        std::getline(parfile,line);
        if (line.length() > 1 && (line[0] != '+') && (line[0] != '#')) //lines starting with + are for rules and # should be ignored
          {
          std::string field;
          std::stringstream sline;
          sline.str(line);
          sline >> field;
          parameter_type par;
          sline >> par;
          field_deque_type sf=separate(field);
          if (sf.size() == (size_t)1) 
            {
            insert_parameter(sf[0],par,fname); //for parameters without prefixes such as terminal_AT_enthalpy
            }
          else
            {
            for (size_t n=1; n < sf.size(); ++n)
            insert_parameter(sf[n]+Field_separator+sf[0],par,fname);
            }
          }
        } 
      parfile.close();
      }
      
    inline parameter_type get_if_exists(std::string id)
      {
      typename map_type::const_iterator it=this->find(id);
      if (it != this->end()) 
        {
        used_map_inc(it->first);
        return it->second;
        }
      return parameter_type();
      }
 
    inline par_bool_type get_pair_if_exists(std::string id)
      {
      par_bool_type par;
      typename map_type::const_iterator it=this->find(id);
      par.second = it != this->end();
      if (par.second) 
        {
        used_map_inc(it->first);
        par.first = it->second;
        }
      else par.first = parameter_type();
      return par;
      }
      
    /// \brief Generates a set or multiset of all id in use
    ///
    /// Specify the _Container either as set or multiset. A set
    /// will give you unique ids, multiset will give you repeated ids.
    template<class _Container>
    inline void get_all_id(_Container& collection) const
      {
      field_deque_type first_field;
      typename map_type::const_iterator it;
      for(it=this->begin(); it != this->end(); ++it)
        {
        first_field=separate(it->first);
        for(size_t n=1; n < first_field.size(); ++n)
          collection.insert(first_field[n]);
        }
      }

    };
};
#endif
