import spacy
import pickle
from nltk.corpus import wordnet


def load_spacy_values(model = "en_core_web_md", filepath_docs_spacy = 'dict_spacy_object.pkl'):
    '''
    Loads a spaCy language model and a dictionary of spaCy Doc objects from a pickle file.

    Parameters
    ----------
    model : str
        The name or local path of the spaCy model to be loaded for processing text. 
        For example, "en_core_web_sm" or a custom model path.

    filepath_docs_spacy : str
        The path to the pickle file containing a dictionary where the keys are tokens 
        (strings) and the values are the corresponding serialized spaCy Doc objects.

    Returns
    -------
    nlp : spacy.language.Language
        The loaded spaCy language model.

    dict_docs_spacy : dict
        A dictionary where the keys are tokens (strings) and the values are spaCy Doc 
        objects reconstructed from the serialized bytes stored in the pickle file.
    '''
    
    # ---- Load the spaCy NLP model
    #
    nlp = spacy.load(model)
    
    # ---- Load pickle file and reconstruct the dictionary with tokens as keys and spaCy Doc objects as values
    #
    with open(filepath_docs_spacy, 'rb') as file:
        dict_docs_spacy_bytes = pickle.load(file)
    
    dict_docs_spacy = {key: spacy.tokens.Doc(nlp.vocab).from_bytes(doc_bytes) for key, doc_bytes in dict_docs_spacy_bytes.items()}
    
    return nlp, dict_docs_spacy


def find_antonyms(word):
    '''
    Generate a set of all the antonyms of a given word

    Parameters
    ----------
    word : str
        The word that we want to find the antonyms

    Returns
    -------
    antonyms : set of str
        A set of all the antonym detected using nltk and WordNet
    '''
    
    antonyms = set()

    # ---- Load all the set of synonyms of the word recorded from wordnet
    #
    syn_set = wordnet.synsets(word)

    # ---- Loop over each set of synonyms
    #
    for syn in syn_set:
        # ---- Loop over each synonym
        #
        for lemma in syn.lemmas():
            # ---- Add antonyms of the synonyms to the antonyms set
            #
            if lemma.antonyms():
                antonyms.add(lemma.antonyms()[0].name())

    return antonyms


def find_synonyms(word, model, dict_embedding, list_2000_tokens):
    '''
    Finds the most similar token to a given word.

    Parameters
    ----------
    word : str
        The word that we want to find the most similar word

    model : spacy.language.Language
        spaCy language model to use for the detection of the synonym
    
    dict_embedding: dict
        A dictionary where the keys are tokens (str) and the values are spaCy Doc objects

    list_2000_tokens : list of str
        A list of 2000 tokens against which the gloss will be checked.
    
    Returns
    -------
    most_similar_token : str
        The most similar token to the given word 
    '''

    # ---- Skip synonym detection if the word is already in the list_2000_token
    #
    if word in list_2000_tokens:
        return word
    else:
        # ---- Remove antonyms of the given word of the list_2000_tokens (a word and an antonym might be similar in embedding representation)
        #
        antonyms = find_antonyms(word)
        list_2000_tokens_less_antonyms = [token for token in list_2000_tokens if token not in antonyms]

        # ---- Generate a list of tuple (token, similarities values between the embedding of the given word and the embedding of each token of the list_2000_tokens)
        #
        word_embedding = model(word)
        similarities=[]
    
        for token in list_2000_tokens_less_antonyms:
            similarities.append((token, dict_embedding.get(token).similarity(word_embedding)))

        # ---- Extract the most similar token of the list
        #
        most_similar_token = sorted(similarities, key=lambda item: -item[1])[0][0]

        return most_similar_token