// $Id: FileFormat.h 967 2017-07-13 12:04:25Z ge $
/// \file FileFormat.h
/// \brief Contains the base class FileFormat used by FastaFormat and GenbankFormat
///
/// $Revision: 967 $
/// \author Gerald Weber <gweberbh@gmail.com>
#ifndef FILEFORMAT_H
#define FILEFORMAT_H "$Id: FileFormat.h 967 2017-07-13 12:04:25Z ge $"
#include <iostream>
#include <string>
#include <sstream>
#include <list>
#include "Nucleotide.h"
#include "NucleotideSequence.h"
#include "SequencePosition.h"
#include "ContigTable.h"
#include "zipbuffer.h"

#define MAXLINESIZE 2000

namespace gbc {

/// \brief Base class for handling file formats.
///
/// Serves as a base for FastaFormat and GenbankFormat.
class FileFormat
  {
  protected:  
    std::string FileName;                    ///< Name/path of the file to be parsed.
  public:
    enum compression_type {normal,gzip};     ///< Possible types of compression;
    compression_type compression;            ///< Compression in use, this determines which type of file buffer will be active;
    std::istream file_handle;                ///< Stream handle, not necessarily a file.
    std::filebuf file_buffer;                ///< Default File Buffer.
    gbc::zipbuffer compressed_buffer;      ///< Buffer for gziped files.
    bool Contig_begin;                       ///< True if the last read was exactly at the begining of a new contig.
    bool contig_capable;                     ///< Indicates if file format has the capability of discriminating contigs.
    bool forward_reading;                    ///< Direction of reading (not in use).
    bool get_complementary;                  ///< Flags if we should get the complementary of a Nucleotide (default=false)
    SequencePosition Last_sequence_position; ///< The SequencePosition of the last sucessful reading, i.e., where a valid Nucleotide was found.
    std::stringstream the_line;              ///< Holds an entire line.
    size_t the_line_begin;                   ///< Holds an entire line.
    size_t the_line_end;                     ///< Holds an entire line.
    ContigTable Contig_table;                ///< Table of detected ou loaded contigs for the current file.
    bool Contig_mapping;                     ///< Flag which is true if we are mapping contigs.
    bool Verbose;                            ///< Be verbose about several actions, (default=false)

  /// The void constructor.
  FileFormat(void): FileName(), compression(normal), file_handle(&file_buffer), 
    Contig_begin(false), contig_capable(false), forward_reading(true), get_complementary(false),
    Contig_mapping(false), Verbose(false)
    {}

  /// Constructor which takes the filename as argument.
  FileFormat(std::string fn): file_handle(&file_buffer), ///< the file name
    Contig_begin(false), contig_capable(false), forward_reading(true), get_complementary(false),
    Contig_mapping(false), Verbose(false)
    {file_name(fn);}

  /// The destructor.
  virtual ~FileFormat(void) {}

  /// Sets the file name
  inline void file_name(const std::string &fn) ///< the file name
    {
    FileName=fn;
    if (FileName.find(".gz") != std::string::npos) 
      {
      compression=gzip;
      file_handle.rdbuf(&compressed_buffer);
      }
    else
      {
      compression=normal;
      }
    }

  /// Gets the stored file name
  inline std::string file_name(void) {return FileName;}

  /// Go back to the physical beginning of the file.
  inline void rewind(void)
    {      
    file_handle.clear();
    file_handle.seekg((std::streampos)0,std::ios_base::beg);
    Last_sequence_position=SequencePosition();
    }

 /// Go back to the logical beginning of the genome file.
  virtual inline void start_over(void)
    {
    rewind();
    }
  
  /// Reads until the end of the line and throws away it's content.
  inline void goto_next_line(void)
    {
    char rawline[MAXLINESIZE];
    file_handle.getline(rawline,MAXLINESIZE);
    }

  /// Reads until the end of the line and places it into FileFormat::the_line.
  inline void get_next_line(void)
    {
    the_line.clear(); the_line.str("");
    if (!eof())
      {
      char rawline[MAXLINESIZE];
      the_line_begin=tellg();
      file_handle.getline(rawline,MAXLINESIZE);
      the_line_end=tellg();
      the_line.str(rawline);
      the_line.seekg(0);
      }
    }

  /// \brief Gets stream position of a string found in the_line.
  /// \return The stream position of the target string or (streampos)0 if not found.
  ///
  /// The method allows to specify the start position in the_line which is useful if
  /// multiple searches of the same strings are intended.
  inline std::streampos find_in_line(const std::string &st,  ///< the target string
                                     const int start=0) ///< the position at which we start searching
    {
    std::streampos fp=(std::streampos)0;
    std::string cmp=the_line.str();
    size_t cst=cmp.find(st,start);
    if (cst <= cmp.length())
      {
      fp=cst+st.length();
      }
    return fp;
    }

  /// \brief Gets stream position of a set of characters found in the_line.
  /// \return The stream position of the target string or (streampos)0 if not found.
  ///
  /// The method allows to specify the start position in the_line which is useful if
  /// multiple searches of the same target chars are intended.
  inline std::streampos find_any_in_line(const std::string &st,  ///< the target string
                                         const int start=0) ///< the position at which we start searching
    {
    std::streampos fp=(std::streampos)0;
    std::string cmp=the_line.str();
    size_t cst=cmp.find_first_of(st,start);
    if (cst <= cmp.length())
      {
      fp=cst+st.length();
      }
    return fp;
    }

  /// Set to ignore all contigs, only useful if followed by a select_contig afterwards.
  inline void ignore_all_contigs(void) {Contig_table.mark(false);}

  /// Selects the contig to ignore.
  inline void ignore_contig(const std::string &tp, ///< type of contig to ignore
                            const std::string &st) ///< the exact contig description
    {
    Contig_table.mark(false,tp,st);
    }

  /// Selects the contig to read.
  inline void select_contig(const std::string &tp, ///< type of contig to read
                            const std::string &st) ///< the exact contig description
    {
    Contig_table.mark(true,tp,st);
    }

  /// Checks the format of the input file and may retrieve a number of important parameters about the file.
  virtual bool check_file_type(void) {return true;}

  /// Prepares the file for reading.
  virtual void prepare_reading(void) {rewind();}

  /// Opens the File.
  inline void open(void)
    {
    if (Verbose) {std::cout << "Trying to open " << FileName << " ...";}
    file_handle.clear();
    file_handle.exceptions(std::ios::failbit | std::ios::badbit);
    try 
      {
      switch(compression)
        {
	case gzip:   compressed_buffer.zipopen(FileName); break;
	default:
	case normal: file_buffer.open ((const char *)FileName.c_str(),std::ios::in);
	}
      }
    catch (const std::ios::failure& error) {std::cerr << error.what() << " while trying to open " << FileName << "(" << __FILE__ << " " << __LINE__ << ")" << std::endl; exit(1);}
    catch (const std::exception& error)    {std::cerr << error.what() << __FILE__ << __LINE__ << std::endl; exit(1);}
    catch (...)                            {std::cerr << "Unknown exception" << __FILE__ << __LINE__ << std::endl; exit(1);}
    file_handle.exceptions(std::ios::goodbit);
    if (Verbose) {std::cout << "done." << std::endl;}
    }

  /// Maps all contigs in a file and places them into FileParser::Contig_table.
  /// This function does nothing in FileFormat, for actual implementations see FastaFormat or GenbankFormat.
  virtual void map_contigs(const std::string key=std::string()) ///< key may hold information about a specific feature, not implemented on all formats
    {}

  /// Checks if we are at the begin of a new contig and sets Conti_begin=true if this is the case.
  virtual void verify_contig_begin(void)
    {
    if (!Contig_table.empty())
      {
      Contig_begin=Contig_table.current()->Stream_section.begin()==tellg();
      if (Contig_begin && Verbose) {std::cout << "Contig starts at " << tellg() << std::endl;}
      }
    }

  /// Reads the file and places it into Nucleotide<char>.
  virtual void read(Nucleotide<char> &charnt) ///< the Nucleotide<char> where the result of the reading goes
    {}

  template<class _InternalTp>
  /// Reads the file and places it into Nucleotide<_InternalTp>.
  void file_read(Nucleotide<_InternalTp> &nt) ///< Nucleotide to be assigned to
    {
    Nucleotide<char> charnt;
    read(charnt);
    nt=charnt; nt.Good=charnt.Good;
    }

  /// \brief Check for the end of the file.
  /// \return true if the end of the file is reached.
  virtual inline bool eof(void) {return file_handle.eof();}

  /// Close the file.
  inline void close(void)
    {
    switch(compression)
      {
      case gzip:   compressed_buffer.zipclose(); break;
      default:
      case normal: file_buffer.close();
      }
    
    }
  
  /// \brief Check for file opening
  /// \return true if open.
  inline bool is_open(void)
    {
    switch(compression)
      {
      case gzip:   return compressed_buffer.is_open(); break;
      default:
      case normal: return file_buffer.is_open();
      }
    return false;
    }

  /// Returns the good() function of a file handle.
  inline bool good(void) {return file_handle.good();}

  /// \brief Get the stream position.
  /// \return the (streampos) of the file_handle.
  inline std::streampos tellg(void) {return file_handle.tellg();}

  virtual size_t read_contig(std::string &sequence, std::string &comment)
    {
    std::cerr << "read_contig not implemented for this class\n";
    return 0;
    } 

  template<class _Tp>
  inline size_t read_complete_contig(NucleotideSequence<_Tp> &sequence, std::string &comment)
    {
    std::string seq;
    size_t read_something=read_contig(seq,comment);
    sequence.clear();
    sequence.window_size=seq.length();
    sequence << seq;
    return read_something;
    }

  };//class FileFormat
};//namespace uniqueseq

#endif
