|
import pickle |
|
import random |
|
|
|
import time |
|
import datetime |
|
from pytz import timezone |
|
|
|
import gradio as gr |
|
|
|
def classify_lyrics(lyric): |
|
|
|
print('=' * 70) |
|
print('Req start time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT))) |
|
start_time = time.time() |
|
|
|
print('=' * 70) |
|
print('Req string:', lyric) |
|
|
|
print('=' * 70) |
|
print('Preparing to search...') |
|
|
|
search_query = lyric |
|
|
|
text_search_query = [at.split(chr(32)) for at in search_query.lower().split(chr(10))] |
|
|
|
tsq = [] |
|
|
|
for t in text_search_query: |
|
for tt in t: |
|
tsq.append(''.join(filter(str.isalpha, tt.lower()))) |
|
clean_text_search_query = list(dict.fromkeys(tsq)) |
|
|
|
clean_text_search_query_set = set(clean_text_search_query) |
|
|
|
random.shuffle(lyrics_set_final) |
|
|
|
texts_match_ratios = [] |
|
|
|
print('Searching titles...Please wait...') |
|
|
|
for l in lyrics_set_final: |
|
|
|
text_set = set(l[2]) |
|
|
|
word_match_count = len(clean_text_search_query_set & text_set) |
|
|
|
match_ratio = word_match_count / len(min(clean_text_search_query_set, text_set)) |
|
|
|
words_match_consequtive_ratio = sum([1 if a == b else 0 for a, b in zip(clean_text_search_query, l[2])]) / len(min(clean_text_search_query, l[2])) |
|
|
|
texts_match_ratios.append((match_ratio + words_match_consequtive_ratio) / 2) |
|
|
|
sorted_texts_match_ratios = sorted(set(texts_match_ratios), reverse=True) |
|
|
|
result = lyrics_set_final[texts_match_ratios.index(sorted_texts_match_ratios[0])][:2] |
|
|
|
print('Done!') |
|
print('=' * 70) |
|
print('Search match ratio:', sorted_texts_match_ratios[0]) |
|
print('Found title/artist and genre:',result[0], "---", result[1]) |
|
print('=' * 70) |
|
|
|
print('Req end time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT))) |
|
print('-' * 70) |
|
print('Req execution time:', (time.time() - start_time), 'sec') |
|
print('=' * 70) |
|
|
|
return sorted_texts_match_ratios[0], result[0], result[1] |
|
|
|
demo = gr.Interface( |
|
fn=classify_lyrics, |
|
inputs=[gr.Textbox(label="Enter any lyrics here", value="So close, no matter how far\nCouldn't be much more from the heart\nForever trusting who we are\nAnd nothing else matters")], |
|
outputs=[gr.Label(label="Match score"), |
|
gr.Textbox(label="Title/Artist"), |
|
gr.Textbox(label="Genre (if identified)")], |
|
title="Algorithmic Lyrics Classifier", |
|
description="Algorithmic match lyrics classification by artist and genre" |
|
) |
|
|
|
if __name__ == "__main__": |
|
|
|
PDT = timezone('US/Pacific') |
|
|
|
print('=' * 70) |
|
print('App start time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT))) |
|
print('=' * 70) |
|
|
|
print('Loading data...') |
|
with open('English_Lyrics_Ordered_Sets_Small_1358353.pickle', 'rb') as f: |
|
lyrics_set_final = pickle.load(f) |
|
print('=' * 70) |
|
print('Done!') |
|
print('=' * 70) |
|
|
|
|
|
demo.launch() |