import pickle
import random

import gradio as gr

def classify_lyrics(lyric):
    
    search_query = lyric
    
    text_search_query = [at.split(chr(32)) for at in search_query.lower().split(chr(10))]
    
    tsq = []
    for t in text_search_query:
      for tt in t:
        tsq.append(''.join(filter(str.isalpha, tt.lower())))
    clean_text_search_query = list(dict.fromkeys(tsq))
    
    clean_text_search_query_set = set(clean_text_search_query)
    
    random.shuffle(lyrics_set_final)
    
    texts_match_ratios = []
    
    for l in lyrics_set_final:
    
      text_set = set(l[2])
    
      word_match_count = len(clean_text_search_query_set & text_set)
    
      match_ratio = word_match_count / len(min(clean_text_search_query_set, text_set))
    
      words_match_consequtive_ratio = sum([1 if a == b else 0 for a, b in zip(clean_text_search_query, l[2])]) / len(min(clean_text_search_query, l[2]))
      
      texts_match_ratios.append((match_ratio + words_match_consequtive_ratio) / 2)
    
    sorted_texts_match_ratios = sorted(set(texts_match_ratios), reverse=True)
    
    result = lyrics_set_final[texts_match_ratios.index(sorted_texts_match_ratios[0])][:2]
    
    print(result, texts_match_ratios.index(sorted_texts_match_ratios[0]), texts_match_ratios.count(sorted_texts_match_ratios[0]))
    
    return sorted_texts_match_ratios[0], result[0], result[1]

demo = gr.Interface(
    fn=classify_lyrics,
    inputs=["text"],
    outputs=["label", "text", "text"],
)

if __name__ == "__main__":

    print('Loading data...')
    with open('English_Lyrics_Ordered_Sets_Small_1358353.pickle', 'rb') as f:
        lyrics_set_final = pickle.load(f)
    print('Done!')

    
    demo.launch()