File size: 2,746 Bytes
652002d
d5865e4
652002d
572479d
 
 
 
edb351b
 
652002d
041fb15
 
 
 
 
 
 
 
 
 
652002d
 
 
 
 
 
810abe0
652002d
 
 
 
 
 
 
 
 
 
041fb15
 
652002d
d5865e4
652002d
 
 
 
 
 
 
 
 
 
 
 
 
 
041fb15
 
 
 
 
 
 
83a985f
652002d
 
 
0b59869
1ab5f2b
810abe0
7f66da9
810abe0
041fb15
652002d
 
 
 
9e503bc
 
 
 
 
 
652002d
 
 
9e503bc
652002d
9e503bc
edb351b
652002d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import pickle
import random

import time
import datetime
from pytz import timezone

import gradio as gr

def classify_lyrics(lyric):

    print('=' * 70)
    print('Req start time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT)))
    start_time = time.time()
    
    print('=' * 70)
    print('Req string:', lyric)
    
    print('=' * 70)
    print('Preparing to search...')
    
    search_query = lyric
    
    text_search_query = [at.split(chr(32)) for at in search_query.lower().split(chr(10))]
    
    tsq = []
    
    for t in text_search_query:
      for tt in t:
        tsq.append(''.join(filter(str.isalpha, tt.lower())))
    clean_text_search_query = list(dict.fromkeys(tsq))
    
    clean_text_search_query_set = set(clean_text_search_query)
    
    random.shuffle(lyrics_set_final)
    
    texts_match_ratios = []

    print('Searching titles...Please wait...')
    
    for l in lyrics_set_final:
    
      text_set = set(l[2])
    
      word_match_count = len(clean_text_search_query_set & text_set)
    
      match_ratio = word_match_count / len(min(clean_text_search_query_set, text_set))
    
      words_match_consequtive_ratio = sum([1 if a == b else 0 for a, b in zip(clean_text_search_query, l[2])]) / len(min(clean_text_search_query, l[2]))
      
      texts_match_ratios.append((match_ratio + words_match_consequtive_ratio) / 2)
    
    sorted_texts_match_ratios = sorted(set(texts_match_ratios), reverse=True)
    
    result = lyrics_set_final[texts_match_ratios.index(sorted_texts_match_ratios[0])][:2]

    print('Done!')
    print('=' * 70)
    print('Search match ratio:', sorted_texts_match_ratios[0])
    print('Found title/artist and genre:',result[0], "---", result[1])
    print('=' * 70)
        
    return sorted_texts_match_ratios[0], result[0], result[1]

demo = gr.Interface(
    fn=classify_lyrics,
    inputs=[gr.Textbox(label="Enter any lyrics here", value="So close, no matter how far\nCouldn't be much more from the heart\nForever trusting who we are\nAnd nothing else matters")],
    outputs=[gr.Label(label="Match score"), 
             gr.Textbox(label="Title/Artist"), 
             gr.Textbox(label="Genre (if identified)")],
    title="Algorithmic Lyrics Classifier",
    description="Algorithmic match lyrics classification by artist and genre"
)

if __name__ == "__main__":

    PDT = timezone('US/Pacific')
    
    print('=' * 70)
    print('App start time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT)))
    print('=' * 70)

    print('Loading data...')
    with open('English_Lyrics_Ordered_Sets_Small_1358353.pickle', 'rb') as f:
        lyrics_set_final = pickle.load(f)
    print('=' * 70)
    print('Done!')
    print('=' * 70)

    
    demo.launch()