Spaces:
Runtime error
Runtime error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
|
5 |
+
from huggingface_hub import hf_hub_url, cached_download
|
6 |
+
from gensim.models.fasttext import load_facebook_model
|
7 |
+
|
8 |
+
# download model from huggingface hub
|
9 |
+
url = hf_hub_url(repo_id="simonschoe/call2vec", filename="model.bin")
|
10 |
+
cached_download(url)
|
11 |
+
|
12 |
+
# load model via gensim
|
13 |
+
model = load_facebook_model(cached_download(url))
|
14 |
+
|
15 |
+
def process(_input, topn, similar):
|
16 |
+
|
17 |
+
# convert input to lower, replace whitespaces by underscores
|
18 |
+
_input = _input.lower().replace(' ', '_')
|
19 |
+
_input = _input.split('\n')
|
20 |
+
|
21 |
+
# apply model
|
22 |
+
if len(_input)>1:
|
23 |
+
# compute average seed embedding
|
24 |
+
avg_input = np.stack([model.wv[w] for w in _input], axis=0).mean(axis=0)
|
25 |
+
# find (dis)similarities
|
26 |
+
if similar=='Dissimilar':
|
27 |
+
nearest_neighbors = model.wv.most_similar(negative=avg_input, topn=topn)
|
28 |
+
else:
|
29 |
+
nearest_neighbors = model.wv.most_similar(positive=avg_input, topn=topn)
|
30 |
+
frequencies = [model.wv.get_vecattr(nn[0], 'count') for nn in nearest_neighbors]
|
31 |
+
else:
|
32 |
+
# find (dis)similarities
|
33 |
+
if similar=='Dissimilar':
|
34 |
+
nearest_neighbors = model.wv.most_similar(negative=_input[0], topn=topn)
|
35 |
+
else:
|
36 |
+
nearest_neighbors = model.wv.most_similar(positive=_input[0], topn=topn)
|
37 |
+
frequencies = [model.wv.get_vecattr(nn[0], 'count') for nn in nearest_neighbors]
|
38 |
+
|
39 |
+
result = pd.DataFrame([(a[0],a[1],b) for a,b in zip(nearest_neighbors, frequencies)], columns=['Token', 'Cosine Similarity', 'Frequency'])
|
40 |
+
return result
|
41 |
+
|
42 |
+
def save(df):
|
43 |
+
df.to_csv('result.csv')
|
44 |
+
return 'result.csv'
|
45 |
+
|
46 |
+
demo = gr.Blocks(theme="dark")
|
47 |
+
|
48 |
+
with demo:
|
49 |
+
gr.Markdown("# Title")
|
50 |
+
gr.Markdown("## Subtitle")
|
51 |
+
with gr.Row():
|
52 |
+
with gr.Column():
|
53 |
+
similar_radio = gr.Radio(choices=["Similar", "Dissimilar"])
|
54 |
+
n_output = gr.Slider(minimum=5, maximum=50, step=1)
|
55 |
+
gr.Markdown(
|
56 |
+
"""### Example prompts:
|
57 |
+
- Example 1
|
58 |
+
- Example 2
|
59 |
+
"""
|
60 |
+
)
|
61 |
+
with gr.Column():
|
62 |
+
with gr.Tabs():
|
63 |
+
with gr.TabItem("Single"):
|
64 |
+
with gr.Column():
|
65 |
+
text_input = gr.Textbox(lines=1)
|
66 |
+
df_output = gr.Dataframe(interactive=False)
|
67 |
+
with gr.Row():
|
68 |
+
compute_button_s = gr.Button("Compute")
|
69 |
+
export_button_s = gr.Button("Export as CSV")
|
70 |
+
file_out_s = gr.File(interactive=False)
|
71 |
+
with gr.TabItem("Multiple"):
|
72 |
+
with gr.Column():
|
73 |
+
text_input_multiple = gr.Textbox(lines=3)
|
74 |
+
df_output_multiple = gr.Dataframe(interactive=False)
|
75 |
+
with gr.Row():
|
76 |
+
compute_button_m = gr.Button("Compute")
|
77 |
+
export_button_m = gr.Button("Export as CSV")
|
78 |
+
file_out_m = gr.File(interactive=False)
|
79 |
+
with gr.Column():
|
80 |
+
gr.Markdown("""
|
81 |
+
### Project Description
|
82 |
+
Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.""")
|
83 |
+
|
84 |
+
compute_button_s.click(process, inputs=[text_input, n_output, similar_radio], outputs=df_output)
|
85 |
+
compute_button_m.click(process, inputs=[text_input_multiple, n_output, similar_radio], outputs=df_output_multiple)
|
86 |
+
|
87 |
+
export_button_s.click(save, inputs=[df_output], outputs=file_out_s)
|
88 |
+
export_button_s.click(save, inputs=[df_output_multiple], outputs=file_out_s)
|
89 |
+
|
90 |
+
demo.launch()
|