import {
    MAX_KEYWORD_LENGTH,
    MIN_KEYWORD_LENGTH,
    SPECIFIC_LETTER_UNICODES,
} from '../../constants/constants';
import { adjustPatternForBrackets, getWordBoundaryRegex } from './regexHelpers';

export const isKeywordLengthValid = (keyword = '') => {
    const trimmedKeyword = keyword.trim();

    return (
        trimmedKeyword.length >= MIN_KEYWORD_LENGTH && trimmedKeyword.length <= MAX_KEYWORD_LENGTH
    );
};

export const getKeywordLengthErrorMessage = () => {
    return `Keyword must be between ${MIN_KEYWORD_LENGTH} and ${MAX_KEYWORD_LENGTH} characters long`;
};

export const getKeywordUrlErrorMessage = () => {
    return 'Please enter a valid URL (e.g. https://www.google.com)';
};

const getUniqueElements = (arrayItems = []) => {
    const seen = new Set();

    return arrayItems.filter((element) => {
        const uniqueKey = `${element[0]}-${element.index}`;
        if (seen.has(uniqueKey)) {
            return false;
        } else {
            seen.add(uniqueKey);
            return true;
        }
    });
};

const convertPythonRegexToJS = (pythonRegex) => {
    if (!pythonRegex) {
        return pythonRegex;
    }

    // Replace Python named capture groups (?P<name>...) with JS-style named groups (?<name>...)
    let jsRegex = pythonRegex.replace(/\(\?P<(\w+)>/g, '(?<$1>');

    // Handle Python's \Z for end of string (use \z in JS)
    jsRegex = jsRegex.replace(/\\Z/g, '\\z');

    // Handle word boundaries \b, keeping it for now (could adjust based on needs)
    jsRegex = jsRegex.replace(/\\b/g, '\\b');

    // Handling specific characters like 'ù', 'é' (in unicode)
    let isSpecificEnd = false;

    for (const [letter, unicodeValue] of Object.entries(SPECIFIC_LETTER_UNICODES)) {
        jsRegex = jsRegex.replaceAll(new RegExp(letter, 'g'), unicodeValue);
        jsRegex = jsRegex.replaceAll(new RegExp(`\\${unicodeValue}`, 'g'), unicodeValue);

        if (new RegExp(`[${letter}|${unicodeValue}]`, 'g')?.test(jsRegex)) {
            isSpecificEnd = true;
        }
    }

    return { backendRegexString: jsRegex, isSpecificEnd };
};

const getBackendRegexMatches = (lowercasedText = '', keywordItem = {}, getAllMatches = true) => {
    let backendRegexMatches = [];

    if (!keywordItem || !lowercasedText.length) {
        return [];
    }

    // There is try-catch block to fix broken regex expression
    try {
        const { backendRegexString, isSpecificEnd } = convertPythonRegexToJS(keywordItem.regex);
        const changedRegex = new RegExp(backendRegexString, 'gi');

        backendRegexMatches = Array.from(
            getAllMatches
                ? lowercasedText.matchAll(changedRegex)
                : lowercasedText.match(changedRegex)
        )?.filter((match) => {
            if (isSpecificEnd) {
                let matchText = match[0];
                const lastSymbol = matchText[matchText.length - 1];

                if (
                    !/[\wа-я]/.test(lastSymbol) &&
                    !Object.keys(SPECIFIC_LETTER_UNICODES).includes(lastSymbol)
                ) {
                    // Do not highlight: brackets, dots, commas, apostrophy, etc.
                    match[0] = matchText.slice(0, -1);
                } else {
                    return match;
                }
            }

            return match;
        });
    } catch {
        console.warn('Regex exception:', keywordItem.regex);

        return null;
    }

    return backendRegexMatches;
};

export const collectAllKeywordMatches = (text = '', keywordItem = {}) => {
    if (!keywordItem || !text.length) {
        return [];
    }

    const escapedKeywordText = keywordItem.keyword.toLocaleLowerCase();
    const lowercasedText = text.toLocaleLowerCase();

    const backendRegexMatches = getBackendRegexMatches(lowercasedText, keywordItem, true);

    if (backendRegexMatches !== null) {
        return backendRegexMatches;
    }

    const matchedKeywordsArray = Array.from(lowercasedText.matchAll(escapedKeywordText));
    const originalMatches = Array.from(lowercasedText.matchAll(keywordItem.regex));
    const additionalMatches = Array.from(
        lowercasedText.matchAll(adjustPatternForBrackets(keywordItem.keyword, keywordItem.regex))
    );

    const allMatches = getUniqueElements(
        matchedKeywordsArray.concat(originalMatches.concat(additionalMatches))
    );

    return allMatches;
};

export const isKeywordFoundInText = (text = '', keywordItem = {}) => {
    if (!keywordItem || !text.length) {
        return false;
    }

    const lowercasedText = text.toLocaleLowerCase();
    const lowercasedKeyword = keywordItem.keyword.toLocaleLowerCase().replaceAll('à', '\u00E0');

    const backendRegexMatches = getBackendRegexMatches(lowercasedText, keywordItem, true);

    if (backendRegexMatches !== null) {
        return backendRegexMatches?.length > 0;
    }

    // Only direct keyword mathes for corner cases not covered yet by backend regex patterns, like:
    // shelf -> shelves. The code below should be deprecated in the future.
    const keywordPattern = getWordBoundaryRegex(lowercasedKeyword);
    const additionalPattern = adjustPatternForBrackets(lowercasedKeyword, keywordPattern);

    if (keywordPattern === additionalPattern) {
        return keywordPattern.test(lowercasedText);
    }

    return keywordPattern.test(lowercasedText) || additionalPattern.test(lowercasedText);
};

export const normalizeKeyword = (keyword) => {
    return keyword
        .replace(/_{2,}/g, "_")
        .replace(/-{2,}/g, "-")
        // eslint-disable-next-line
        .replace(/(\\{2,}|\/{2,}|[,@<>*#%^?{}\[\]()])/g, "")
        // eslint-disable-next-line
        .replace(/^[\/ _\-\u00A0]+|[\/ _\-\u00A0]+$/g, "")
        .trim()
        .replace(/\s+/g, ' ');
};
