Commit
·
77196ea
1
Parent(s):
d513e74
update sts
Browse files
README.md
CHANGED
@@ -4,7 +4,7 @@ emoji: 🏢
|
|
4 |
colorFrom: green
|
5 |
colorTo: gray
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 5.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
|
|
4 |
colorFrom: green
|
5 |
colorTo: gray
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 5.23.1
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
app.py
CHANGED
@@ -73,6 +73,9 @@ with gr.Blocks() as demo:
|
|
73 |
|
74 |
with gr.Row():
|
75 |
with gr.Column():
|
|
|
|
|
|
|
76 |
score_sts = gr.Number(label="score", value=0.96, interactive=True)
|
77 |
submit_button_sts = gr.Button("Submit", variant="primary")
|
78 |
|
@@ -85,7 +88,7 @@ with gr.Blocks() as demo:
|
|
85 |
|
86 |
submit_button_sts.click(
|
87 |
fn=sts,
|
88 |
-
inputs=[upload_button_sentences1, upload_button_sentences2, score_sts],
|
89 |
outputs=output_sts
|
90 |
)
|
91 |
|
|
|
73 |
|
74 |
with gr.Row():
|
75 |
with gr.Column():
|
76 |
+
model = gr.Dropdown(
|
77 |
+
["Lajavaness/bilingual-embedding-large", "sentence-transformers/all-mpnet-base-v2",
|
78 |
+
"intfloat/multilingual-e5-large-instruct"], label="model", interactive=True)
|
79 |
score_sts = gr.Number(label="score", value=0.96, interactive=True)
|
80 |
submit_button_sts = gr.Button("Submit", variant="primary")
|
81 |
|
|
|
88 |
|
89 |
submit_button_sts.click(
|
90 |
fn=sts,
|
91 |
+
inputs=[model, upload_button_sentences1, upload_button_sentences2, score_sts],
|
92 |
outputs=output_sts
|
93 |
)
|
94 |
|
sts.py
CHANGED
@@ -5,20 +5,19 @@ import torch
|
|
5 |
from datasets import Dataset
|
6 |
from sentence_transformers import SentenceTransformer
|
7 |
|
8 |
-
|
|
|
9 |
st = time.time()
|
10 |
|
11 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
12 |
model = SentenceTransformer(
|
13 |
-
|
14 |
-
backend="openvino",
|
15 |
-
model_kwargs={"file_name": "openvino/openvino_model.xml"},
|
16 |
device=device,
|
17 |
trust_remote_code=True,
|
18 |
)
|
19 |
|
20 |
-
sentences1 = Dataset.from_pandas(pd.read_csv(data1, on_bad_lines='skip', header=0,
|
21 |
-
sentences2 = Dataset.from_pandas(pd.read_csv(data2, on_bad_lines='skip', header=0,
|
22 |
|
23 |
embeddings1 = model.encode(sentences1["text"], normalize_embeddings=True, batch_size=1024,
|
24 |
show_progress_bar=True)
|
@@ -45,12 +44,12 @@ def sts(data1, data2, score):
|
|
45 |
.join(df_sentences2, on="column_index"))
|
46 |
|
47 |
df_long = df_long.rename({
|
48 |
-
"text": "
|
49 |
"text_right": "sentences2",
|
50 |
-
})
|
51 |
|
52 |
elapsed_time = time.time() - st
|
53 |
print('Execution time:', time.strftime("%H:%M:%S", time.gmtime(elapsed_time)))
|
54 |
|
55 |
-
return df_long.
|
56 |
-
|
|
|
5 |
from datasets import Dataset
|
6 |
from sentence_transformers import SentenceTransformer
|
7 |
|
8 |
+
|
9 |
+
def sts(modelname, data1, data2, score):
|
10 |
st = time.time()
|
11 |
|
12 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
13 |
model = SentenceTransformer(
|
14 |
+
modelname,
|
|
|
|
|
15 |
device=device,
|
16 |
trust_remote_code=True,
|
17 |
)
|
18 |
|
19 |
+
sentences1 = Dataset.from_pandas(pd.read_csv(data1, on_bad_lines='skip', header=0, sep="\t"))
|
20 |
+
sentences2 = Dataset.from_pandas(pd.read_csv(data2, on_bad_lines='skip', header=0, sep="\t"))
|
21 |
|
22 |
embeddings1 = model.encode(sentences1["text"], normalize_embeddings=True, batch_size=1024,
|
23 |
show_progress_bar=True)
|
|
|
44 |
.join(df_sentences2, on="column_index"))
|
45 |
|
46 |
df_long = df_long.rename({
|
47 |
+
"text": "sentences1",
|
48 |
"text_right": "sentences2",
|
49 |
+
}).drop(["row_index", "column_index"])
|
50 |
|
51 |
elapsed_time = time.time() - st
|
52 |
print('Execution time:', time.strftime("%H:%M:%S", time.gmtime(elapsed_time)))
|
53 |
|
54 |
+
return df_long.filter(pl.col("score") > score).sort(["score"],
|
55 |
+
descending=True)
|
utils.py
CHANGED
@@ -6,7 +6,7 @@ import polars as pl
|
|
6 |
import time
|
7 |
|
8 |
def getDataFrame(path):
|
9 |
-
data = pd.read_csv(path, on_bad_lines='skip', header=0,
|
10 |
return pl.from_pandas(data)
|
11 |
|
12 |
def save_to_csv(dataframe):
|
|
|
6 |
import time
|
7 |
|
8 |
def getDataFrame(path):
|
9 |
+
data = pd.read_csv(path, on_bad_lines='skip', header=0, sep="\t")
|
10 |
return pl.from_pandas(data)
|
11 |
|
12 |
def save_to_csv(dataframe):
|