// $Id: ContigTable.h 628 2011-08-31 15:42:16Z ge $
/// \file ContigTable.h
/// \brief Defines ContigTable class
///
/// $Revision: 628 $
/// \author Gerald Weber <gweberbh@gmail.com>
#ifndef CONTIGTABLE_H
#define CONTIGTABLE_H "$Id: ContigTable.h 628 2011-08-31 15:42:16Z ge $"
#include "ContigDescription.h"
#include <map>
#include <list>

namespace gbc
  {
  
  /// \brief ContigTable handles the information about known contigs.
  /// 
  /// The information is stored into a std::list<ContigDescription> by the
  /// use of the funtion map_contig() [defined by each class derived from FileFormat,
  /// such as FastaFormat and GenbankFormat].
  /// After reading in the contig information, it is possible to mark the contigs
  /// as to be ignored or not and it is possible to join contigs that overlap.
  /// The file parsers take the contig marks into account when reading a file, i.e.
  /// they will not read in contigs that are marked to be ignored.
  class ContigTable: public std::list<ContigDescription>
    {
    private:
      static bool cvs_id;   ///< This is only to force cvs info initialization in module.
    public:
      typedef std::map<std::string,int> keyword_map; ///< The type for keyword statistics
      ContigTable::iterator Current;       ///< Points to the current ContigDescription.
      bool Ignore_outside;                 ///< Should we deal with outside contigs.
      keyword_map Keyword_statistics;      ///< Keywords that were found during parsing of the file.
      bool Contig_begin;                   ///< Flag which signals if we are at the beginning of a contig
                                           ///< this is used by FileParser to determine the contig position.
      bool Contig_inside;                  ///< Flag which signals if we are inside a contig

      /// \brief The void constructor. The flag Ignore_outside is set to false.
      ContigTable(void);

      /// \brief Returns the ContigTable::iterator to current.
      inline ContigTable::iterator current(void) {return Current;}

      /// \brief Returns the ContigTable::iterator to previous description.
      ///
      /// Generaly, when a contig has just been read Current points already to the
      /// next contig, therefore we need to know the previous one.
      inline ContigTable::iterator just_read(void) 
        {
        ContigTable::iterator Prev=Current;
        Prev--;
        return Prev;
        }

      /// When we call new_contig this function will collect the keyword, if new, into Keyword_collection
      void collect_keyword(const std::string &kw);


      /// Initiates a new contig from a ContigDescription object. Current iterator will be set to
      /// this object.
      void new_contig(ContigDescription &cd); ///< The ContigDescriptor object which will be copied into the ContigTable

      /// \brief Initiates a new contig which as st as its description. 
      /// A new object of type ContigDescription will be created and Current iterator will be set to
      /// this object.
      void new_contig(const std::string &tp,  ///< The ContigType
                      const std::string &st); ///< The string which describes this contig, should be unique for a given ContigType.

      /// \brief Finds the ContigDescription of type tp with has st as description string.
      /// \return ContigTable::iterator to the find or end().
      /// \attention Check for the return value.
      inline ContigTable::iterator find(const std::string &tp,  ///< The ContigType
                                        const std::string &st=std::string())  ///< the description of the contig, needs to be an exact match
        {
	ContigTable::iterator ct;
	if (Current->Contig_type==tp) {if (Current->description()==st) return Current;}
	for(ct=begin(); ct !=end(); ct++)
	  {
	  if (ct->Contig_type==tp) 
            {if (ct->description()==st || st==std::string()) break;}
	  }
	return ct;
	}

      /// \brief Returns true if there is a ContigDescription with this st description.
      /// If found set ContigTable::Current to be the position where this contig was
      /// found.
      /// \return true if found.
      bool find_contig(const std::string &tp,  ///< The ContigType
                       const std::string &st=std::string()); ///< the description of the contig, needs to be an exact match or empty for find the first occurence of that type.

      /// \brief Sets the description of the current ContigDescription object.
      void description(const std::string st); ///< The string which describes this contig, should be unique for a given ContigType.

      /// \brief Returns the description of the current ContigDescription object.
      std::string description(void);

      /// Sets the Stream_section of the current ContigDescription object.
      inline
      void stream_section(const std::streampos &p1,   ///< streampos where the contig starts
                          const std::streampos &p2)   ///< streampos where the contig ends
        {current()->Stream_section.section(p1,p2);}

      /// \brief Sets the Nucleotide_section of the current ContigDescription object.
      inline 
      void contig_section(const nucleotide_position_type &p1,  ///< Nucleotide position where the contig starts
                          const nucleotide_position_type &p2)  ///< Nucleotide position where the contig starts
        {current()->Contig_section.section(p1,p2);}

      /// \brief Checks if the current streampos is the current Stream_section.
      /// The Stream_section is the one that was mapped previously and is now
      /// where Current points to.
      /// \return Returns true if ps is the the current Stream_section.
      bool in_current_contig_section(const std::streampos &ps);    ///< the streampos to check

      /// \brief Checks if the current streampos is the current Contig_section.
      /// The Contig_section is the one that was mapped previously and is now
      /// where Current points to.
      /// \return Returns true if ps is the the current Contig_section.
      bool in_current_contig_section(const nucleotide_position_type &ps);    ///< the Nucleotide position to check

      /// \brief Checks if we are between one contig and the next one.
      /// \return True if we are.
      bool between_nearest_contigs(const ContigTable::iterator &ct,     ///< Iterator to ContigTable
                                   const nucleotide_position_type &ps); ///< Nucleotide position to check.

      /// \brief Checks if given nucleotide_position is insied the Current contig.
      /// \return true if ns is inside some contig section
      bool inside_contig(const nucleotide_position_type &ns); ///< the Nucleotide position to check

      /// \brief Marks if a contig should be considered (true) or ignored (false). 
      /// \return ContigTable::iterator to the last contig that was marked.
      ContigTable::iterator  mark(const bool consider=true,            ///< True if we should consider this contig, false if we should ignore it.
                                  const std::string &tp=std::string(), ///< The type of contig, e.g, CDS.
                                  const std::string &st=std::string()); ///< The exact description of the contig, e.g, \gene="E3".

      /// \brief Check if the current stream position should be ignored.
      /// Note that usually you would prefer checking the actual Nucleotide Position.
      /// \attention Currently only FastaFormat knows about stream positions.
      /// \return True if we should ignore it.
      bool ignore(const std::streampos &ps); ///< the streampos to check

      /// \brief Check if the current nucleotide position should be ignored.
      /// \attention Currently only FastaFormat knows about stream positions.
      /// \return True if we should ignore it.
      bool ignore(const nucleotide_position_type &ps); ///< the Nucleotide position to check

      /// \brief Ignore all mapped contigs.
      /// \deprecated Obsolete, use mark(false);
      inline void ignore_all_contigs(void) {mark(false);}

      /// \brief Selects a specific type contig to ignore.
      /// \deprecated Obsolete, use mark(false,tp)
      inline
      void ignore_contig_type(const std::string &tp)   ///< The ContigType
        {mark(false,tp);}

      /// \brief Selects a specific contig to ignore.
      /// \deprecated Obsolete, use mark(false,tp,st)
      inline 
      void ignore_contig(const std::string &tp,  ///< The ContigType
                         const std::string &st)  ///< the exact contig description
        {mark(false,tp,st);}

      /// \brief Selects a specific type of contig.
      /// \deprecated Obsolete, use mark(true,tp)
      inline
      void select_contig_type(const std::string &tp)  ///< The ContigType, if empty selects all
        {mark(true,tp);}

      /// \brief Selects a specific type of contig, set any other type to be ignored.
      inline
      void select_only_contig_type(const std::string &tp)  ///< The ContigType
        {mark(false); mark(true,tp);}

      /// \brief Removes all entries to be ignored from the table, useful to save memory.
      void delete_ignored_contigs(void);

      /// \brief Selects a specific contig to read.
      /// \deprecated Obsolete, use mark(true,tp,st)
      inline
      void select_contig(const std::string &tp, ///< The ContigType
                         const std::string &st) ///< the exact contig description
        {mark(true,tp,st);}

      /// \brief Shows the number of overlapping contigs.
      /// \attention This is only meaningful after running ContigTable::merge_overlaps.
      /// \return The number of overlaps.
      int count_overlaps(void);

      /// \brief Merges the overlapping contigs.
      /// This will merge overlapping contigs of the same type and which have not
      /// been marked to be ignored.
      /// The contigs will be joined together into a single description separated by "|".
      /// \attention Old contigs, that have been overlapped will be deleted.
      /// \return The amount of overlapping contigs.
      int merge_overlaps(void);

      /// \brief Jumps to the next stream positions which should not be ignored.
      /// \attention Currently works only with FastaFormat.
      std::streampos next_streampos(const std::streampos &ps, bool forward=true); ///< the streampos to jump to

      /// \brief Jumps to the next Nucleotide position which should not be ignored.
      nucleotide_position_type next_contigpos(const nucleotide_position_type &ps); ///< the Nucleotide position to jump to

      /// \brief Extractor which prints the whole ContigTable.
      friend std::ostream& operator<<(std::ostream &out,       ///< ofstream
                                      const ContigTable &ct);  ///< The ContigTable to be printed

      /// \brief Extractor which prints the whole ContigTable keywords.
      /// \attention keywords are not currently in use.
      friend std::ostream& operator<<(std::ostream &out,       ///< ofstream
                                      const keyword_map &km);  ///< The Keyword_statistics to be printed
      };
};

#endif
