Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,14 @@
|
|
1 |
import streamlit as st
|
|
|
|
|
|
|
2 |
from sentence_transformers.util import cos_sim
|
3 |
from sentence_transformers import SentenceTransformer
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
@st.cache
|
6 |
def load_model():
|
@@ -8,6 +16,12 @@ def load_model():
|
|
8 |
model.eval()
|
9 |
return model
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
st.title("Sentence Embedding for Spanish with Bertin")
|
12 |
st.write("Sentence embedding for spanish trained on NLI. Used for Sentence Textual Similarity. Based on the model hackathon-pln-es/bertin-roberta-base-finetuning-esnli.")
|
13 |
|
@@ -20,6 +34,41 @@ if st.button('Compute similarity'):
|
|
20 |
encodings = model.encode([sent1, sent2])
|
21 |
sim = cos_sim(encodings[0], encodings[1]).numpy().tolist()[0][0]
|
22 |
st.text('Cosine Similarity: {0:.4f}'.format(sim))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
else:
|
24 |
st.write('Missing a sentences')
|
25 |
else:
|
|
|
1 |
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
from sentence_transformers.util import cos_sim
|
6 |
from sentence_transformers import SentenceTransformer
|
7 |
+
from bokeh.plotting import figure, output_notebook, show, save
|
8 |
+
from bokeh.io import output_file, show
|
9 |
+
from bokeh.models import ColumnDataSource, HoverTool
|
10 |
+
from sklearn.manifold import TSNE
|
11 |
+
|
12 |
|
13 |
@st.cache
|
14 |
def load_model():
|
|
|
16 |
model.eval()
|
17 |
return model
|
18 |
|
19 |
+
@st.cache
|
20 |
+
def load_plot_data():
|
21 |
+
embs = np.load('semeval2015-embs.npy')
|
22 |
+
data = pd.read_csv('semeval2015-data.csv')
|
23 |
+
return embs, data
|
24 |
+
|
25 |
st.title("Sentence Embedding for Spanish with Bertin")
|
26 |
st.write("Sentence embedding for spanish trained on NLI. Used for Sentence Textual Similarity. Based on the model hackathon-pln-es/bertin-roberta-base-finetuning-esnli.")
|
27 |
|
|
|
34 |
encodings = model.encode([sent1, sent2])
|
35 |
sim = cos_sim(encodings[0], encodings[1]).numpy().tolist()[0][0]
|
36 |
st.text('Cosine Similarity: {0:.4f}'.format(sim))
|
37 |
+
|
38 |
+
print('Generating visualization...')
|
39 |
+
sentembs, data = load_plot_data()
|
40 |
+
X_embedded = TSNE(n_components=2, learning_rate='auto',
|
41 |
+
init='random').fit_transform(np.concatenate([sentembs, encodings], axis=1))
|
42 |
+
|
43 |
+
data.append({'sent': sent1, 'color': '#F0E442'}) # sentence 1
|
44 |
+
data.append({'sent': sent2, 'color': '#D55E00'}) # sentence 2
|
45 |
+
data['x'] = X_embedded[:,0]
|
46 |
+
data['y'] = X_embedded[:,1]
|
47 |
+
|
48 |
+
source = ColumnDataSource(data)
|
49 |
+
|
50 |
+
p = figure(title="Embeddings in space")
|
51 |
+
p.circle(
|
52 |
+
x='x',
|
53 |
+
y='y',
|
54 |
+
legend_label="Objects",
|
55 |
+
#fill_color=["red"],
|
56 |
+
color='color',
|
57 |
+
fill_alpha=0.5,
|
58 |
+
line_color="blue",
|
59 |
+
size=14,
|
60 |
+
source=source
|
61 |
+
)
|
62 |
+
p.add_tools(HoverTool(
|
63 |
+
tooltips=[
|
64 |
+
('sent', '@sent')
|
65 |
+
],
|
66 |
+
formatters={
|
67 |
+
'@sent': 'printf'
|
68 |
+
},
|
69 |
+
mode='mouse'
|
70 |
+
))
|
71 |
+
st.bokeh_chart(p, use_container_width=True)
|
72 |
else:
|
73 |
st.write('Missing a sentences')
|
74 |
else:
|