// $Id: FastaFormat.h 1101 2020-04-21 18:21:41Z ge $
/// \file FastaFormat.h
/// \brief Contains the base class FastaFormat
///
/// $Revision: 1101 $
/// \author Gerald Weber <gweberbh@gmail.com>
#ifndef FASTAFORMAT_H
#define FASTAFORMAT_H "$Id: FastaFormat.h 1101 2020-04-21 18:21:41Z ge $"
#include "FileFormat.h"

namespace gbc {

/// \brief Parses a file in Fasta format.
///
/// This class can also map the contigs in a file.
class FastaFormat: public FileFormat
  {
  public:
    bool Return_at_contig_begin;

  /// The void constructor.
  FastaFormat(void): Return_at_contig_begin(false)
    {
    FileFormat::contig_capable=true;
    }

  /// Constructor which takes the filename as argument.
  FastaFormat(const std::string &fn): FileFormat(fn), Return_at_contig_begin(false)
    {
    FileFormat::contig_capable=true;
    }

  /// Verifies if the file starts with a `>' as Fasta files do.
  virtual bool check_file_type(void);

  /// Prepares to read by simply going to the begining of the file.
  virtual void prepare_reading(void);

  /// Maps all contigs in a file and places them into FileParser::Contig_table.
  virtual void map_contigs(const std::string key=std::string()); ///< key not used.

  /// \brief Specialized function for reading files in fasta format.
  ///
  /// If the file has been mapped for contigs only the sections between contigs are
  /// read.
  virtual inline void read(Nucleotide<> &nt) ///< Nucleotide to be assigned to
    {fasta_read(nt);}
    
  template<class _Tp>
  /// brief Specialized function for reading files in fasta format.
  ///
  /// If the file has been mapped for contigs only the sections between contigs are
  /// read.
  void fasta_read(Nucleotide<_Tp> &nt) ///< Nucleotide to be assigned to
    {
    Contig_begin=false;
    bool isgood=false;
    while (good() && !eof() && !isgood)
      {
      // The following section could be important if the fasta file is not read continuously
      // but since it has been the source of very slow processing 
      //if (!Contig_mapping)
        //{
        //if (!Contig_table.empty())
          //{
          //file_handle.seekg(Contig_table.next_streampos(file_handle.tellg(),forward_reading));
          //Last_sequence_position.contig_description(Contig_table.current()->Description.front());
          //verify_contig_begin();
          //}
        //}
      std::streampos current_stream_position=tellg();
      char cr;
      file_handle >> cr;
      if (eof()) {nt.Good=false; break;}
      switch(cr)
        {
        case '>': get_next_line();
		  Last_sequence_position.contig_description(the_line.str());
		  if (Contig_mapping)
		    {
		    if (Contig_table.Current != Contig_table.end())
		      {
		      Contig_table.current()->Stream_section.end(Last_sequence_position.Stream_position);
		      Contig_table.current()->Contig_section.end(Last_sequence_position.Nucleotide_position);
		      }
		    Contig_table.new_contig("fasta",Last_sequence_position.contig_description());
		    Contig_table.current()->Stream_section.begin(tellg());
		    Contig_table.current()->Contig_section.begin(Last_sequence_position.Nucleotide_position+1);
		    }
		  Contig_begin=true;
		  if (Contig_begin && Verbose) {std::cout << "New contig at " << tellg() << std::endl;}
		  if (Return_at_contig_begin) 
                    {nt='#'; return;}
                  else break;
	default:   if (nt.dictionary.index_map.find(cr) != nt.dictionary.index_map.end())
	             {
		     nt=cr;
		     Last_sequence_position.stream_position(current_stream_position);
		     Last_sequence_position.Nucleotide_position++;
	             nt.Sequence_position=Last_sequence_position;
		     if (get_complementary) nt.complementary();
		     isgood=true;
		     }
		   else
		     {
	             nt.Good=false;
		     isgood=true;
		     }
	}
      }
   }

  /// \brief Gets complete contigs for files in fasta format.
  ///
  /// \return The length of the sequence we read
  virtual size_t read_contig(std::string &sequence, std::string &comment)
    {
    size_t sequence_length=0;
    sequence=std::string();
    comment=std::string();
    int comment_char=0;
    char cr;
    while(!eof())
      {
      file_handle >> cr;
      if (eof()) return sequence_length;
      switch(cr)
        {
        case '>':
          comment_char++;
          if (comment_char == 1) 
            {
            get_next_line();
            comment=the_line.str();
            }
          if (comment_char == 2)
            {
            file_handle.putback(cr);
            return sequence_length;
            }
          break;
        default:
          sequence += cr;
          sequence_length++;
        }
      }
    return sequence_length;
    }

 };
};

#endif






