/**
 * @package de.atwillys.cc.swl.util
 * @license BSD (simplified)
 * @author Stefan Wilhelm (stfwi)
 *
 * @file csv_reader.hh
 * @ccflags
 * @ldflags
 * @platform linux, bsd, windows
 * @standard >= c++98
 *
 * -----------------------------------------------------------------------------
 *
 * Template based CSV reader.
 *
 * Parses a CSV file (or istream) into a nested STL data container, by default
 * a "vector of vector of the datatype you like". The outer vector represents the
 * rows, the inner the columns. The element/field data type is always specified
 * by you.
 *
 * - Does not throw exceptions, instead provides boolean return variables and
 *   the getter `error()`.
 *
 * - Provides head line parsing.
 *
 * - You can choose if the file definitely has a header, definitely does not
 *   have a header or "auto" - means the parser tries to convert the CSV field
 *   string into the element type that you specified as template argument. If
 *   this fails in the first line it assumes the first line is header.
 *
 * - The number of columns is dynamic - means the parser does not care if the
 *   rows have different amount of separators. Provides method `data_size_consistent()`
 *   if you want to check if all rows have the same number of columns (including
 *   the header).
 *
 * - Detected column separators are `,` and `;`
 *
 * - Detected row separators are CR and LF.
 *
 * - Empty lines are ignored and not added to the result container
 *
 * - Accepts double quotes (`"`) to wrap strings that may contain separators,
 *   but no single quotes.
 *
 * - In double quotes wraps, a quote that shall be interpreted as text can be
 *   escaped using a double-double-quote (`""`) or with a backslash (`\"`).
 *   The backslashes before a quote can be escaped as well (`\\"`) to specify
 *   that the recognised character shall be a backslash, not a quote.
 *   Backslashes that are not before a quote don't need to be escaped (means
 *   `" a=\t " ," b=\\ "` are unmodified, whereas `"\\","\""` results in
 *   `\` and `"`).
 *
 * - The output can be fetched as reference or const reference using the
 *   method `data()`.
 *
 * - Element converters (string to field data type) for primitive types
 *   provided (int, long, char, double, float). Other data types are converted
 *   using input streams (stream operator >>). For a class data type you may
 *   need to implement the stream operator `>>` OR add a converter function
 *   template to the namespace `sw::strcast` (see below).
 *
 *  --------------------------------------------------------------------------
 *
 * - Simple file parse example:
 *
 *  sw::csv_reader<string> csv(argv[1]); // File is immediately parsed
 *
 *  // Errors ? --> use switch case for details
 *  if(csv.error()) {
 *    cout << "Warning: There are parse errors." << endl;
 *  }
 *
 *  // All rows have the same num of columns?
 *  if(!csv.data_size_consistent()) {
 *    cout << "Warning: Data size not consistent." << endl;
 *  }
 *
 *  // Has a header? --> Print it
 *  if(csv.has_header()) {
 *    for(size_t i=0; i<csv.header().size(); ++i) {
 *      cout << "\"" << csv.header()[i] << "\" ";
 *    }
 *  }
 *
 *  // Print data, columns space separated, rows newline separated.
 *  for(size_t i=0; i<csv.data().size(); ++i) {
 *    for(size_t j=0; j<csv.data()[i].size(); ++j) {
 *      cout << csv.data()[i][j] << " ";
 *    }
 *   cout << endl;
 *  }
 *  --------------------------------------------------------------------------
 *
 * - Simple stream/string parse example, elements are double:
 *
 *  // The stream,
 *  stringstream in_stream("x,y,z \n 1,2,3 \n 4,5,6 \n 7,8,9");
 *
 *  // CSV reader object
 *  sw::csv_reader<double> csv;
 *
 *  // Some settings ...
 *  csv.abort_on_error(false);  // Continue if a double is or not parsable
 *  csv.with_header(true);      // We know we have a header.
 *
 *  // Parse ...
 *  in_stream >> csv;           // That's it already.
 *
 *  // The rest is like the example above.
 *  if(csv.error()) {
 *  [...]
 *
 *  --------------------------------------------------------------------------
 *
 *  - Element data conversion: If your cell/element type is a class, simply
 *    implement an istream operator for it, e.g.:
 *
 *      // The class
 *      struct my_point_class { double x,y; };
 *
 *      // That will read two values separated by some character, like
 *      // "1.0/2.0" or "10,20" into the instance variables x and y.
 *      inline std::istream& operator >> (std::istream &is, my_point_class &p)
 *      { char c; return is >> p.x >> c >> p.y; }
 *
 *   OR implement a "string-to-your-type" function template:
 *
 *   namespace sw  { namespace strcast {
 *     template <>
 *     inline bool cast<my_point_class>(const string_type &s, my_point_class& p)
 *     { +++ parse s and transfer to p.x and p.y +++ }
 *   }}
 *
 * --------------------------------------------------------------------------
 * +++ BSD license header +++
 * Copyright (c) 2008-2014, Stefan Wilhelm (stfwi, <cerbero s@atwilly s.de>)
 * All rights reserved.
 * Redistribution and use in source and binary forms, with or without modification,
 * are permitted provided that the following conditions are met: (1) Redistributions
 * of source code must retain the above copyright notice, this list of conditions
 * and the following disclaimer. (2) Redistributions in binary form must reproduce
 * the above copyright notice, this list of conditions and the following disclaimer
 * in the documentation and/or other materials provided with the distribution.
 * (3) Neither the name of atwillys.de nor the names of its contributors may be
 * used to endorse or promote products derived from this software without specific
 * prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
 * AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER
 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
 * DAMAGE.
 * --------------------------------------------------------------------------
 */
#ifndef SW_CSV_READER_HH
#define	SW_CSV_READER_HH

// <editor-fold desc="preprocessor" defaultstate="collapsed">
#include <cstdlib>
#include <vector>
#include <string>
#include <fstream>
#include <sstream>
#include <iostream>
#include <cmath>
#include <limits>
#ifdef __MSC_VER
namespace { template <typename T> T round(T d) { return floor(d+.5); } }
#endif
#define NaN (std::numeric_limits<double>::quiet_NaN())
#ifdef CSV_DEBUG
#define CSV_DBG(X) std::cerr << X << std::endl;
#else
#define CSV_DBG(X)
#endif
// </editor-fold>

// <editor-fold desc="auxiliary string casts" defaultstate="collapsed">
namespace sw { namespace strcast {

  /**
   * String to any istream compatible
   * @param const std::string & s
   * @param _T & d
   * @return bool
   */
  template <typename _T>
  inline bool cast(const std::string &s, _T& d)
  { std::istringstream is(s); is >> d; return is.good()||is.eof(); }

  /**
   * String passthrough
   * @param const std::string & s
   * @param std::string & d
   * @return bool
   */
  template <>
  inline bool cast<std::string>(const std::string &s, std::string &d)
  { d=s; return true; }

  /**
   * String to double. Returns false on parse error and sets value to NaN
   * @param const std::string & s
   * @param double & d
   * @return bool
   */
  template <>
  inline bool cast<double>(const std::string &s, double& d)
  {
    const char *ps=s.c_str();
    char *pe;
    while(ps && isspace(*ps)) ps++;
    if(!ps) { d=NaN; return false; }
    d = strtod(ps, &pe);
    if(pe<=ps) { d=NaN; return false; }
    return true;
  }

  /**
   * String to float
   * @param const std::string & s
   * @param float & d
   * @return bool
   */
  template <>
  inline bool cast<float>(const std::string &s, float &d)
  { double dd; if(!cast<double>(s, dd)) { d=(float)dd; return false; } else { d=(float)dd; return true; } }

  /**
   * String to long
   * @param const std::string & s
   * @param long & d
   * @return bool
   */
  template <>
  inline bool cast<long>(const std::string &s, long &d)
  { double dd; if(!cast<double>(s, dd)) { d=(long)round(dd); return false; } else { d=0; return true; } }

  /**
   * String to int
   * @param const std::string & s
   * @param int & d
   * @return bool
   */
  template <>
  inline bool cast<int>(const std::string &s, int &d)
  { double dd; if(!cast<double>(s, dd)) { d=(int)round(dd); return false; } else { d=0; return true; } }

  /**
   * String to char
   * @param const std::string & s
   * @param char & d
   * @return bool
   */
  template <>
  inline bool cast<char>(const std::string &s, char &d)
  { double dd; if(!cast<double>(s, dd)) { d=(char)round(dd); return false; } else { d=0; return true; } }

}}
// </editor-fold>

// <editor-fold desc="csv_reader" defaultstate="collapsed">
namespace sw {

/**
 * Parses a CVS file into a vector<vector<Template data type> >, where the
 * outer vector represents the rows and the inner vector the columns/fields/cells.
 *
 */
template <typename elem_type>
class csv_reader
{
public:

  /**
   * Character type
   * SW, NOTE: Fixed char, can be included in template later
   */
  typedef char char_type;

  /**
   * The type of this template class
   */
  typedef csv_reader<elem_type> my_type;

  /**
   * String type
   */
  typedef typename std::basic_string<char_type> string_type;

  /**
   * Header container + element type
   */
  typedef typename std::vector<string_type> header_type;

  /**
   * Type of columns
   */
  typedef typename std::vector<elem_type> col_type;

  /**
   * Type of rows
   */
  typedef typename std::vector<col_type> row_type;

  /**
   * Error codes
   * @enum error_type
   */
  typedef enum { ok=0, stream_error, conversion_error } error_type;

  /**
   * CVS file header detection
   * @enum header_type
   */
  typedef enum { header_no=0, header_yes=1, header_auto } header_sel_type;

  /**
   * Standard constructor
   */
  csv_reader() : break_err_(true), with_header_(header_auto)
  { clear(); }

  /**
   * Settings constructor
   * @param bool abort_on_err
   * @param header_sel_type with_header=header_auto
   */
  csv_reader(bool abort_on_err, header_sel_type with_header=header_auto,
      string_type comment_chars="") :
    break_err_(abort_on_err), with_header_(with_header),
    cmnt_chrs_(comment_chars)
  { clear(); }

  /**
   * Settings constructor
   * @param bool abort_on_err
   * @param header_sel_type with_header=header_auto
   */
  csv_reader(bool abort_on_err, bool withheader) : break_err_(abort_on_err)
  { clear(); with_header(withheader); }

  /**
   * File constructor, immediately parses the file.
   * @param const string_type &file
   * @param bool abort_on_err=true
   * @param header_sel_type with_header=header_auto
   * @param string_type comment_chars=""
   */
  csv_reader(const string_type &file, bool abort_on_err=true,
    header_sel_type with_header=header_auto, string_type comment_chars="") :
    break_err_(abort_on_err), with_header_(with_header), cmnt_chrs_(comment_chars)
  { readfile(file); }


  /**
   * File constructor, immediately parses the file.
   * @param const string_type &file
   * @param bool abort_on_err=true
   * @param header_sel_type with_header=header_auto
   */
  csv_reader(const char_type *file, bool abort_on_err=true,
    header_sel_type with_header=header_auto, const char_type *comment_chars="") :
    break_err_(abort_on_err), with_header_(with_header), cmnt_chrs_(comment_chars)
  { readfile(string_type(file)); }

  /**
   * Destructor
   */
  virtual ~csv_reader()
  { ; }

  /**
   * Returns a reference to the parsed data
   * @return data_type &
   */
  inline row_type & data() throw()
  { return data_; }

  /**
   * Returns a const reference to the parsed data
   * @return data_type &
   */
  inline const row_type & data() const throw()
  { return data_; }

  /**
   * Returns a const reference to the parsed data
   * @return const data_type &
   */
  inline const row_type & operator()() const throw()
  { return data_; }

  /**
   * Returns a reference to the parsed header
   * @return header_type &
   */
  inline header_type & header() throw()
  { return header_; }

  /**
   * Returns a const reference to the parsed header
   * @return const header_type &
   */
  inline const header_type & header() const throw()
  { return header_; }

  /**
   * Returns true if a header was parsed
   * @return bool
   */
  inline bool has_header() const throw()
  { return has_header_; }

  /**
   * Returns the error code, code "ok" = 0
   * @return error_type
   */
  inline error_type error() const throw()
  { return error_; }

  /**
   * Returns true if there is an error.
   * @return bool
   */
  inline bool operator ! () const throw()
  { return error_ != ok; }

  /**
   * Returns the number of rows / data lines / "records"
   * @return size_t
   */
  inline size_t rows() const throw()
  { return data_.size(); }

  /**
   * Returns the number of columns (the maximal number of fields for all lines)
   * @return size_t
   */
  inline size_t cols() const throw()
  { return max_cols_; }

  /**
   * Returns the parsed file name/path
   * @return const string_type&
   */
  inline const string_type & file() const throw()
  { return file_; }

  /**
   * Returns true if the parser shall abort if an error occurs.
   * @return bool
   */
  inline bool abort_on_error() const throw()
  { return break_err_; }

  /**
   * Defines if the parser shall abort if an error occurs.
   * @param bool v
   * @return my_type&
   */
  inline my_type & abort_on_error(bool v) throw()
  {  break_err_ = v;  return *this; }

  /**
   * Returns if the parser shall look for a VSC header line.
   * @return header_sel_type
   */
  inline header_sel_type with_header() const throw()
  { return with_header_; }

  /**
   * Defines if the parser shall look for a VSC header line (boolean setter,
   * true=yes, false=no).
   * @param bool v
   * @return my_type&
   */
  inline my_type & with_header(bool v) throw()
  { with_header_ = v ? header_yes : header_no;  return *this; }

  /**
   * Defines if the parser shall look for a VSC header line.
   * @param header_sel_type v
   * @return my_type&
   */
  inline my_type & with_header(header_sel_type v) throw()
  { with_header_ = v;  return *this; }

  /**
   * Characters that are interpreted as line comment if they are the first
   * character in a line.
   * @return string_type
   */
  inline string_type comment_chars() const throw()
  { return cmnt_chrs_; }

  /**
   * Characters that are interpreted as line comment if they are the first
   * character in a line.
   * @param string_type chars
   * @return my_type&
   */
  inline my_type & comment_chars(const string_type & chars) throw()
  { cmnt_chrs_ = chars;  return *this; }

  /**
   * Returns true if all rows have the same number of columns.
   */
  inline bool data_size_consistent() const throw()
  { return min_cols_ == max_cols_ && (!has_header_ || header_.size() == min_cols_); }

  /**
   * Clears all data and associated instance variables. Setting variables
   * remain untouched.
   */
  inline void clear() throw()
  {
    header_.clear(); data_.clear(); file_.clear();
    error_ = ok;
    max_cols_ = min_cols_ = 0;
    has_header_ = false;
  }

  /**
   * Parse a file, return success.
   * @param string_type file
   * @return bool
   */
  inline bool load(string_type file)
  { return readfile(file); }

  /**
   * Parse a file, return success.
   * @param string_type file
   * @param bool abort_on_err
   * @return bool
   */
  inline bool load(string_type file, bool abort_on_err)
  { this->break_err_ = abort_on_err; return readfile(file); }

  /**
   * Parse a file, return success.
   * @param string_type file
   * @param bool abort_on_err
   * @param header_type with_header
   * @return bool
   */
  inline bool load(string_type file, bool abort_on_err, header_sel_type with_header)
  { break_err_ = abort_on_err; with_header_ = with_header; return readfile(file); }

  /**
   * Parse an input stream, return success.
   * @param std::istream &is
   * @return bool
   */
  inline bool load(std::istream &is)
  { return readstream(is); }

  /**
   * Parse an input stream, return success.
   * @param std::istream &is
   * @param bool abort_on_err
   * @return bool
   */
  inline bool load(std::istream &is, bool abort_on_err)
  { break_err_ = abort_on_err; return readstream(is); }

  /**
   * Parse an input stream, return success.
   * @param string_type file
   * @param bool abort_on_err
   * @param header_type with_header
   * @return bool
   */
  inline bool load(std::istream &is, bool abort_on_err, header_sel_type with_header)
  { break_err_ = abort_on_err; with_header_ = with_header; return readstream(is); }

protected:

  /**
   * Internal file load/parse
   * @param const string_type &file
   * @return bool
   */
  bool readfile(const string_type &file)
  {
    clear();
    file_ = file;
    std::ifstream fs;
    fs.open(file.c_str(), std::fstream::in);
    bool isok = readstream(fs);
    fs.close();
    return isok;
  }

  /**
   * Internal input stream parse. Does not clear to allow to stream in
   * multiple times.
   * @param std::istream &fs
   * @return bool
   */
  bool readstream(std::istream &fs)
  {
    string_type s;
    char c, qc = '\0';
    bool cl = false; // comment line
    std::vector<elem_type> cols;
    s.reserve(512); // faster
    has_header_ = data_.size()>0 || header_.size()>0 || (with_header_==header_no);

    while(fs.good() && isspace(fs.peek())) {
      fs.get();
    }

    if(fs.good() && (c=fs.peek())!=0) {
      cl = cmnt_chrs_.find_first_of(c) != string_type::npos;
    }

    while(fs.good() && fs.get(c)) {
      if(cl) {
        if(c == '\n' || c == '\r') {
          cl = ((c=fs.peek())!=0) && cmnt_chrs_.find_first_of(c) != string_type::npos;
          // Ignore space chars after new line
          while(fs.good() && ((c=fs.peek())=='\n' || c=='\r' || c==' ' || c == '\t')) {
            fs.get(c);
          }
        }
      } else if(qc) {
        if(qc != c) {
          s += c;
        } else if(fs.good() && fs.peek() == qc) { // double quote ("") escaped
          fs.get();
          s += qc;
        } else if(!s.empty() && s[s.length()-1] == '\\') { // backslash escaped
          s.erase(s.length()-1);
          if(s.length()<2 || s[s.length()-2] == '\\') {
            s += qc;
          }
        } else {
          qc = '\0';
        }
      } else if(c=='"') {
        qc = c;
      } else if(c=='\n' || c=='\r' || c==',' || c==';') {
        if(!s.empty() || c==',' || c==';') {
          if(!append_field(s, cols)) break;
        }
        if(c=='\n' || c=='\r') {
          append_row(cols);
          cl = ((c=fs.peek())!=0) && cmnt_chrs_.find_first_of(c) != string_type::npos;
          if(!cl) {
            // Ignore space chars after new line
            while(fs.good() && ((c=fs.peek())=='\n' || c=='\r' || c==' ' || c == '\t')) {
              fs.get(c);
            }
          }
        }
      } else if((c!=' ' && c != '\t') || s.length() > 0) { // leading spaces omitted
        s += c;
      }
    }

    if(fs.bad()) {
      CSV_DBG("STREAM ERROR");
      error_ = stream_error;
    }

    if(!s.empty()) {
      append_field(s, cols);
    }

    append_row(cols);

    if(with_header_ == header_no || header_.empty()) {
      has_header_ = false; // re-adjust
    }

    if(!has_header_) {
      header_.clear();
    } else {
      for(typename header_type::iterator it=header_.begin(); it!=header_.end(); ++it) {
        string_type &s = *it;
        for(size_t i=s.length()-1; i>0 && !s.empty(); i--) {
          if(isspace(s[i])) s.erase(i);
        }
      }
    }

    if((data_.size()) > 0) {
      min_cols_ = std::numeric_limits<size_t>::max();
      for(typename row_type::const_iterator it=data_.begin(); it!=data_.end(); ++it) {
        size_t cols = it->size();
        if(cols < min_cols_) min_cols_ = cols;
        if(cols > max_cols_) max_cols_ = cols;
      }
    }
    return error_ == ok;
  }

  /**
   * Internal field append. Return false === break parse loop.
   * @param s
   * @param cols
   * @return bool
   */
  inline bool append_field(string_type &s, col_type &cols)
  {
    if(data_.size() == 0 && !has_header_) {
      CSV_DBG("Append header field: '" << s << "'");
      header_.push_back(s);
      s = "";
      return true;
    }
    elem_type dst;
    bool r = strcast::cast<elem_type>(s, dst);
    cols.push_back(dst);
    if(!r) {
      CSV_DBG("Append data field: '" << s << "' CONCERSION ERROR");
      error_ = conversion_error;
      if(break_err_) return false;
    } else {
      CSV_DBG("Append data field: '" << s << "'");
    }
    s = "";
    return true;
  }

  /**
   *
   * @param std::vector<elem_type> &cols
   * @return bool
   */
  inline bool append_row(col_type &cols) {
    if(data_.size() == 0) {
      CSV_DBG("Append row, datasize=0: ncols='" << cols.size() << "'");
      if(!has_header_) {
        CSV_DBG("Append row, No header yet");
        has_header_ = true; // 1. Assume header, change this info below
        if(with_header_ != header_yes) { // try convert, header on error
          CSV_DBG("Append row, Check AUTO");
          bool isok=false;
          col_type tmp;
          for(typename header_type::const_iterator it=header_.begin(); it!=header_.end(); ++it) {
            elem_type dst;
            if(!(isok=strcast::cast<elem_type>(*it, dst))) break;
            tmp.push_back(dst);
          }
          if(isok) {
            CSV_DBG("Append row, Check AUTO: Looks like data");
            header_.clear();
            data_.push_back(tmp); // Conversion ok: Data are probably data
            has_header_ = false;
          } else {
            CSV_DBG("Append row, Check AUTO: Looks like header");
          }
        }
      }
      #ifdef CSV_DEBUG
      if(has_header_) {
        std::cerr << "Append row, Header:";
        for(unsigned k=0; k<header_.size(); k++) {
          std::cerr << header_[k] << " | ";
        }
        std::cerr << std::endl;
      }
      #endif
    }

    if(!cols.empty()) {
      data_.push_back(cols);
      cols.clear();
      CSV_DBG("Append row, Added data cols");
    } else {
      CSV_DBG("Append row, Ignore empty cols");
    }
    return true;
  }

protected:

  bool break_err_;
  header_sel_type with_header_;
  string_type file_;
  string_type cmnt_chrs_;

  row_type data_;
  error_type error_;
  size_t max_cols_;
  size_t min_cols_;
  header_type header_;
  bool has_header_;
};


/**
 * Stream input operator for class csv_reader
 * @param std::istream &is
 * @param csv_reader<elem_type> csvr_
 * @return std::istream&
 */
template <typename elem_type>
inline std::istream& operator >> (std::istream &is, csv_reader<elem_type> &csvr_)
{ csvr_.load(is); return is; }

}
// </editor-fold>

// <editor-fold desc="undefs" defaultstate="collapsed">
#undef CSV_DBG
#undef NaN
// </editor-fold>

#endif
