// ライセンス: GPL2

//#define _DEBUG
#include "jddebug.h"

#include "jdregex.h"
#include "miscutil.h"

#ifdef HAVE_MIGEMO_H
#include "jdmigemo.h"
#endif


constexpr std::size_t MAX_TARGET_SIZE = 64 * 1024;  // 全角半角変換のバッファサイズ


using namespace JDLIB;

RegexPattern::RegexPattern( const std::string& reg, const bool icase, const bool newline,
                            const bool usemigemo, const bool wchar, const bool norm )
{
    set( reg, icase, newline, usemigemo, wchar, norm );
}


RegexPattern::~RegexPattern() noexcept
{
    clear();
}


RegexPattern::RegexPattern( RegexPattern&& other ) noexcept
    : m_regex{ other.m_regex }
    , m_compiled{ other.m_compiled }
    , m_newline{ other.m_newline }
    , m_wchar{ other.m_wchar }
    , m_norm{ other.m_norm }
    , m_error{ other.m_error }
{
    other.m_compiled = false;
    other.m_error = decltype( m_error ){};
}


RegexPattern& RegexPattern::operator=( RegexPattern&& other ) noexcept
{
    if( this != &other ) {
        clear();

        m_regex = other.m_regex;
        m_compiled = other.m_compiled;
        m_newline = other.m_newline;
        m_wchar = other.m_wchar;
        m_norm = other.m_norm;
        m_error = other.m_error;

        other.m_compiled = false;
        other.m_error = decltype( m_error ){};
    }
    return *this;
}


void RegexPattern::clear()
{
    if ( m_compiled ) {
        g_regex_unref( m_regex );
        m_regex = nullptr;
    }
    m_compiled = false;
    g_clear_error( &m_error );
}


/** @brief 正規表現パターンを設定する
 *
 * @details 空文字列のパターンは作成しない。
 * @param[in] reg       正規表現パターン
 * @param[in] icase     大文字小文字の区別をしない
 * @param[in] newline   `.`(任意の1文字)に改行をマッチさせない
 * @param[in] usemigemo migemo使用 (コンパイルオプションで指定する必要あり)
 * @param[in] wchar     全角半角の区別をしない
 * @param[in] norm      Unicode互換文字の区別をしない
 * @return 有効な正規表現パターンはtrue、無効なパターンはfalse
 * @n reg が空文字列のときはfalse
 */
bool RegexPattern::set( const std::string& reg, const bool icase, const bool newline,
                        const bool usemigemo, const bool wchar, const bool norm )
{
#ifdef _DEBUG
    if( wchar ){
        std::cout << "RegexPattern::set " << reg << std::endl;
    }
#endif

    clear();

    if( reg.empty() ) return false;

    int cflags = G_REGEX_OPTIMIZE;
    if( newline ) cflags |= G_REGEX_MULTILINE;
    else cflags |= G_REGEX_DOTALL; // . を改行にマッチさせる
    if( icase ) cflags |= G_REGEX_CASELESS;

    m_newline = newline;
    m_wchar = wchar;
    m_norm = norm;

    const char* asc_reg = reg.c_str();
    std::string target_asc;

    // Unicode正規化
    if( m_norm ) {
        target_asc.reserve( reg.size() * 2 );
        MISC::norm( asc_reg, target_asc );
        asc_reg = target_asc.c_str();
    }

    // 全角英数字 → 半角英数字、半角カナ → 全角カナ
    else if( m_wchar && MISC::has_widechar( asc_reg ) ) {

        target_asc.reserve( MAX_TARGET_SIZE );
        std::vector<int> temp;
        MISC::asc( asc_reg, target_asc, temp );
        asc_reg = target_asc.c_str();

#ifdef _DEBUG
        std::cout << target_asc << std::endl;
#endif
    }

#ifdef HAVE_MIGEMO_H
    std::string migemo_regex;

    if( usemigemo ) {

        migemo_regex = jdmigemo::convert( asc_reg );
        if( ! migemo_regex.empty() ) {
            asc_reg = migemo_regex.c_str();
        }
    }
#endif

    m_regex = g_regex_new( asc_reg, GRegexCompileFlags( cflags ), GRegexMatchFlags( 0 ), &m_error );
    if( ! m_regex ) {
        return false;
    }

    m_compiled = true;
    return true;
}


std::string RegexPattern::errstr() const
{
    std::string errmsg;

    if( m_error ) {
        errmsg = m_error->message;
    }
    return errmsg;
}


///////////////////////////////////////////////


bool Regex::match( const RegexPattern& creg, const std::string& target,
                   const std::size_t offset, const bool notbol, const bool noteol )
{
    m_pos.clear();
    m_results.clear();
    m_target_asc.clear();
    m_table_pos.clear();

    if ( ! creg.m_compiled ) return false;

    if( target.empty() ) return false;
    if( target.size() <= offset ) return false;

    const char* asc_target = target.c_str() + offset;

    // Unicode正規化
    if( creg.m_norm ) {
        if( m_target_asc.capacity() < target.size() * 2 ) {
            m_target_asc.reserve( target.size() * 2 );
            m_table_pos.reserve( target.size() * 2 );
        }
        MISC::norm( asc_target, m_target_asc, &m_table_pos );
        asc_target = m_target_asc.c_str();
    }

    // 全角英数字 → 半角英数字、半角カナ → 全角カナ
    else if( creg.m_wchar && MISC::has_widechar( asc_target ) ) {

#ifdef _DEBUG
        std::cout << "Regex::match offset = " << offset << std::endl;
        std::cout << target << std::endl;
#endif

        if( m_target_asc.capacity() < MAX_TARGET_SIZE ) {
            m_target_asc.reserve( MAX_TARGET_SIZE );
            m_table_pos.reserve( MAX_TARGET_SIZE );
        }

        MISC::asc( asc_target, m_target_asc, m_table_pos );
        asc_target = m_target_asc.c_str();

#ifdef _DEBUG
        std::cout << m_target_asc << std::endl;
#endif
    }

    GMatchInfo* pmatch{};

    int eflags = 0;
    if( notbol ) eflags |= G_REGEX_MATCH_NOTBOL;
    if( noteol ) eflags |= G_REGEX_MATCH_NOTEOL;

    if( ! g_regex_match( creg.m_regex, asc_target, GRegexMatchFlags( eflags ), &pmatch ) ) {
        g_match_info_free( pmatch );
        return false;
    }
    const int match_count = g_match_info_get_match_count( pmatch ) + 1;

    for( int i = 0; i < match_count; ++i ){

        int so;
        int eo;
        if( ! g_match_info_fetch_pos( pmatch, i, &so, &eo ) ) so = eo = -1;

        if( so < 0 || eo < 0 ) {
            m_pos.push_back( so );
            m_results.push_back( std::string() );
        }

        else {
            if( ! m_table_pos.empty() ) {
#ifdef _DEBUG
                std::cout << "so=" << so << " eo=" << eo;
#endif
                while( so > 0 && m_table_pos[so] < 0 ) so--;
                so = m_table_pos[so];
                auto it = std::find_if( m_table_pos.cbegin() + eo, m_table_pos.cend(), []( int p ) { return p >= 0; } );
                eo = ( it != m_table_pos.cend() ) ? *it : m_table_pos.size();
#ifdef _DEBUG
                std::cout << " -> so=" << so << " eo=" << eo << std::endl;
#endif
            }
            so += offset;
            eo += offset;

            m_pos.push_back( so );
            m_results.push_back( target.substr( so, eo - so ) );
        }
    }

    g_match_info_free( pmatch );

    return true;
}


bool Regex::match( const RegexPattern& creg, const std::string& target,
                   const std::size_t offset, const bool notbol, const bool noteol,
                   JDLIB::span<const std::string> named_captures )
{
    m_named_numbers.clear();
    for( const std::string& name : named_captures ) {
        const int num = g_regex_get_string_number( creg.m_regex, name.c_str() );
        if( num != -1 ) m_named_numbers.emplace( name, num );
    }

    return match( creg, target, offset, notbol, noteol );
}


//
// マッチした文字列と \0〜\9 を置換する
//
std::string Regex::replace( const std::string& repstr ) const
{
    if( repstr.empty() ) return repstr;

    const char* p0 = repstr.c_str();
    const char* p1;
    std::string str_out;

    while( ( p1 = strchr( p0, '\\' ) ) != nullptr ) {
        int n = p1[1] - '0';
        str_out.append( p0, p1 - p0 );
        p0 = p1 + 2;
        if( n < 0 || n > 9 ) {
            str_out.push_back( p1[1] );
        }
        else if( m_results.size() > static_cast<std::size_t>( n ) && m_pos[n] != -1 ){
            str_out.append( m_results[n] );
        }
    }

    str_out.append( repstr, p0 - repstr.c_str(), std::string::npos );

    return str_out;
}


int Regex::length( std::size_t num ) const noexcept
{
    if( m_results.size() > num ) return m_results[num].size();

    return 0;
}


int Regex::pos( std::size_t num ) const noexcept
{
    if( m_results.size() > num ) return m_pos[num];

    return -1;
}


std::string Regex::str( std::size_t num ) const
{
    if( m_results.size() > num ) return m_results[num];

    return {};
}


/**
 * @brief パターン中にグループ名が有れば名前付きキャプチャ、無ければグループ番号でマッチした部分を取得する。
 *
 * どちらにもマッチしなかったときは空文字列を返す
 * @param[in] name グループ名
 * @param[in] fallback_num パターン中にグループ名が無かったときに取得するグループ番号
 */
std::string Regex::named_or_num( const std::string& name, std::size_t fallback_num ) const
{
    const auto it = m_named_numbers.find( name );
    if( it != m_named_numbers.end() ) return m_results[it->second];

    return str( fallback_num );
}
