/**
 * @package de.atwillys.cc.app
 * @license BSD (simplified)
 * @author stfwi
 *
 * @ccflags: -Ipcre/include -Wno-long-long
 * @ldflags: pcre/lib/libpcrecpp.a pcre/lib/libpcre.a
 *
 * -----------------------------------------------------------------------------
 *
 * PCRE based text filter.
 *
 * -----------------------------------------------------------------------------
 * +++ BSD license header (You know that ...) +++
 * Copyright (c) 2013, StfWi
 * All rights reserved.
 * Redistribution and use in source and binary forms, with or without modification,
 * are permitted provided that the following conditions are met: (1) Redistributions
 * of source code must retain the above copyright notice, this list of conditions
 * and the following disclaimer. (2) Redistributions in binary form must reproduce
 * the above copyright notice, this list of conditions and the following disclaimer
 * in the documentation and/or other materials provided with the distribution.
 * (3) Neither the name of atwillys.de nor the names of its contributors may be
 * used to endorse or promote products derived from this software without specific
 * prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
 * AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER
 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
 * DAMAGE.
 * -----------------------------------------------------------------------------
 */
#include "pcre.hh"
#include <iostream>
#include <string>
#include <vector>
#include <sstream>
#if defined (__linux__) || defined __APPLE__ & __MACH__
#define IS_NIX
#endif

#define APP_NAME "pcref"
#define APP_VER  "1.0"

using namespace std;

/**
 * Prints help text
 */
void help()
{
  #define nl  << std::endl
  #define nl2 << std::endl << std::endl
  #define an << APP_NAME <<
  cerr
  << "NAME" nl2
  << "  " << APP_NAME nl2
  << "SYNOPSIS" nl2
  << "  " an " [-h|--help|-v] '<pattern1>' ['<pattern2>'] [...]" nl2
  << "DESCRIPTION" nl2
  << "  Perl Compatible Regular Expression text Filter." nl2
  << "  The program allows performing match-extract / search-replace operations" nl
  << "  with pattern known from PCRE (or Perl: stdout = stdin =~ <pattern>), where" nl
  << "  one of [`/`, `|`, `#`] can be chosen as pattern separator." nl2
  << "    `" an " 'm/pattern/'` or `" an " '/pattern/'`: Prints first match to" nl
  << "     stdout." nl2
  << "    `" an " 's/pattern/replace/': Replaces all occurrences ot the pattern with" nl
  << "    the replace text (accepted subexpression references are `\\1`,`\\2`, etc," nl
  << "    and `$1`,`$2` etc, both have the same meaning)." nl2
  << " Modifiers:" nl2
  << "   Modifiers are appended to the pattern as known from Perl / PCRE" nl
  << "   (`" an " /pattern/modifiers` or `" an "/pattern/replace/modifiers`)." nl2
  << "   `i`  Ignore case (as in Perl)." nl
  << "   `x`  Permit whitespaces and comments in the pattern (as in Perl)." nl
  << "   `m`  Multi line: `^` and `$` match start/end of the whole text (as in Perl)." nl
  << "   `s`  `.` matches newlines as well (as in Perl)." nl2
  << "   `1`  (Character 'one') Extract/replace only first match, not the whole text." nl
  << "   `$`  `$` matches only at the end (else normal dollar sign)." nl
  << "   `!`  Meaning of `*?` and `*` swapped (`*?` now consumes as much as possible)." nl
  << "   `*`  Disable parenthesise (subexpression) matching." nl
  << "   `X`  Extra (PCRE strict escape parsing)." nl
  << "   `U`  Disable UTF support." nl2
  << " Sequential execution:" nl2
  << "   You can specify multiple expressions as command line arguments, they" nl
  << "   will be processed sequentially, and the final result will be printed" nl
  << "   to stdout. E.g." nl2
  << "     echo 'ABC DEF YES' | " an " 's/ABC[\\s]?/X/' '/(\\w+)\\s(\\w+)/$1=$2/'" nl
  << "                                ( --> XDEF YES)    ( --> XDEF=YES)" nl2
  << " Examples:" nl2
  << "   - Remove tailing spaces of each line:" nl2
  << "     " an " 's/^(.*?)[\\s]+(\\n|$)/$1$2/m'" nl2
  << "   - Extract body from HTML:" nl2
  << "     " an " '|< [\\s]* body .*? > (.*?) <[\\s]* / [\\s]* body |$1|smix1'" nl2
  << "   - Section of an ini-file to json object:" nl2
  << "     " an " '/(.*)/\\n$1\\n/sm' \\" nl
  << "           '/.*? \\n \\[SECTION_NAME\\] [\\s]* (.*?) \\n (\\[|$) /$1/smix1' \\" nl
  << "           's/^([\\w]+) [\\s]* = [\\s]* (.*) ($|\\n)/$1: \"$2\"/imx' \\" nl
  << "           's#\\n#, #imx' \\" nl
  << "           'm|(.*)|{ $1 }|'" nl2
  << " Annotations:" nl2
  << "   - The replace function is global by default, as this is the most often" nl
  << "     used. You can switch it of to replace only one using the modifier `1`." nl2
  << "   - The match operation (optionally) takes a replace part to rearrange" nl
  << "     the matched string using subexpressions (`m/<pattern>/replace/mods`)," nl
  << "     so that the match operation is practically an extract operation." nl2
  << "   - Replace returns the input string if no pattern matches, extract an" nl
  << "     empty string if a pattern does not match." nl2
  << "   - The program always reads the complete text (to memory) before processing." nl
  << "     Hence, large texts cause a higher memory consumption." nl2
  << "   - On error the program does not return any text to stdout." nl2
  << "   - The program understands common escape sequences in the replace text:" nl
  << "     \\n, \\r, \\t, \\v, \\f, \\a, \\b." nl2
  << "ARGUMENTS" nl2
  << "  -h,  --help     Show this help" nl2
  << "  -v,  --verbose  Increased verbosity (outputs to stderr)" nl2
  << "  -vv, --debug    High verbosity (debug information if compiled with)" nl2
  << "  <pattern>       A perl compatible regex pattern as described above." nl2
  << "RETURN VALUES" nl2
  << "  returns 0 on success," nl
  << "          1 on error" nl2
  << "SEE ALSO" nl2
  << "  perlre, pcregrep, grep, egrep, sed, awk, ex" nl2
  << APP_NAME << " v" << APP_VER << ", stfwi; credits to libpcre author(s)." nl
  ;;
  #undef nl
  #undef nl2
  #undef an
}

typedef std::string str_t;
typedef std::vector<sw::pcre_regex> pcre_vector;

/**
 * Main
 * @param int argc
 * @param char** argv
 * @return int
 */
int main(int argc, char** argv)
{
  try {

    // Command line arguments
    if(argc < 2) throw "No expression given (try " APP_NAME " --help)";
    str_t s;
    pcre_vector rx;
    int verbosity = 0;

    // Command line first arg (the very rudimentary way ...)
    int i=1;
    if(argc > 1 && argv[1]) {
      str_t arg = argv[1];
      if(arg == "-h" || arg == "--help") {
        help();
        return 1;
      } else if(arg == "-v" || arg == "--verbose") {
        verbosity = 1;
        i++;
      } else if(arg == "-vv" || arg == "-v2" || arg == "--debug") {
        verbosity = 2;
        i++;
      }
    }

    // Assign and parse patterns before dealing with the text
    for(; i<argc && argv[i]; i++) {
      rx.push_back(sw::pcre_regex(argv[i]));
      if(!rx.back().ok()) {
        s = "Expression "; // ref s existing in main()
        if(i>10) s.push_back('0'+(i/10));
        s.push_back('0'+(i%10));
        s += ": ";
        s += rx.back().error();
        throw s;
      }
    }

    #ifdef IS_NIX
    fd_set fds; struct timeval t; t.tv_sec = 2; t.tv_usec = 0;
    FD_ZERO(&fds); FD_SET(STDIN_FILENO, &fds);
    if(select(2, &fds, NULL, NULL, &t) <= 0 || !FD_ISSET(STDIN_FILENO, &fds)) {
      throw "Pipe in your text data.";
    }
    int n = 0; char buf[512]; buf[511] = '\0';
    while((n=::read(STDIN_FILENO, buf, 511)) > 0) { buf[n]='\0'; s += buf; }
    if(n!=0) throw "Failed to read from stdin";
    #else
    s.clear();
    char c; while(cin.get(c)) s += c;
    #endif

    // Verbose: print before applying expressions
    if(verbosity > 0) {
      for(unsigned i=0; i<rx.size(); i++) {
        cerr << "Expression " << ((int)(i+1)) << ": " << rx[i];
      }
    }

    for(unsigned i=0; i<rx.size(); i++) {
      sw::pcre_regex &re = rx[i];
      if(!re(s).ok()) {
        s.clear(); // reassign s, output no more valid.
        s.reserve(32);
        s = "Expression ";
        if(i>10) s.push_back('0'+(i/10));
        s.push_back('0'+(i%10));
        s += ": ";
        s += re.error();
        throw s;
      }
    }

    cout << s;
  } catch(const str_t &e) {
    cerr << "Error: " << e << endl;
    return 1;
  } catch(const char *e) {
    cerr << "Error: " << e << endl;
    return 1;
  }
  return 0;
}
