import pickle import random import gradio as gr def classify_lyrics(lyric): search_query = lyric text_search_query = [at.split(chr(32)) for at in search_query.lower().split(chr(10))] tsq = [] for t in text_search_query: for tt in t: tsq.append(''.join(filter(str.isalpha, tt.lower()))) clean_text_search_query = list(dict.fromkeys(tsq)) clean_text_search_query_set = set(clean_text_search_query) random.shuffle(lyrics_set_final) texts_match_ratios = [] for l in lyrics_set_final: text_set = set(l[2]) word_match_count = len(clean_text_search_query_set & text_set) match_ratio = word_match_count / len(min(clean_text_search_query_set, text_set)) words_match_consequtive_ratio = sum([1 if a == b else 0 for a, b in zip(clean_text_search_query, l[2])]) / len(min(clean_text_search_query, l[2])) texts_match_ratios.append((match_ratio + words_match_consequtive_ratio) / 2) sorted_texts_match_ratios = sorted(set(texts_match_ratios), reverse=True) result = lyrics_set_final[texts_match_ratios.index(sorted_texts_match_ratios[0])][:2] print(result, texts_match_ratios.index(sorted_texts_match_ratios[0]), texts_match_ratios.count(sorted_texts_match_ratios[0])) return sorted_texts_match_ratios[0], result[0], result[1] demo = gr.Interface( fn=classify_lyrics, inputs=["text"], outputs=["label", "text", "text"], ) if __name__ == "__main__": print('Loading data...') with open('English_Lyrics_Ordered_Sets_Small_1358353.pickle', 'rb') as f: lyrics_set_final = pickle.load(f) print('Done!') demo.launch()