|
import multiprocessing |
|
import threading |
|
import gradio as gr |
|
from mining import mining |
|
from sts import sts |
|
from utils import getDataFrame, save_to_csv, delete_folder_periodically |
|
|
|
CONCURRENCY_LIMIT = 5 |
|
|
|
with gr.Blocks() as demo: |
|
with gr.Tab("Paraphrase Mining"): |
|
with gr.Row(): |
|
gr.Markdown( |
|
"### Paraphrase mining is the task of finding paraphrases (texts with identical / similar meaning) in a large corpus of sentences") |
|
with gr.Row(): |
|
with gr.Column(): |
|
gr.Markdown("#### sentences") |
|
|
|
upload_button_sentences = gr.UploadButton(label="upload sentences csv", file_types=['.csv'], |
|
file_count="single") |
|
output_data_sentences = gr.Dataframe(headers=["text"], col_count=1, label="sentences data") |
|
|
|
upload_button_sentences.upload(fn=getDataFrame, inputs=upload_button_sentences, |
|
outputs=output_data_sentences, concurrency_limit=CONCURRENCY_LIMIT) |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
model = gr.Dropdown( |
|
["Lajavaness/bilingual-embedding-large", "sentence-transformers/all-mpnet-base-v2", |
|
"intfloat/multilingual-e5-large-instruct"], label="model", interactive=True) |
|
score_mining = gr.Number(label="score", value=0.96, interactive=True) |
|
submit_button_mining = gr.Button("Submit", variant="primary") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
output_mining = gr.Dataframe(headers=["score", "sentence_1", "sentence_2"], type="polars", |
|
label="Mining") |
|
|
|
submit_button_mining.click( |
|
fn=mining, |
|
inputs=[model, upload_button_sentences, score_mining], |
|
outputs=output_mining |
|
) |
|
|
|
download_button = gr.Button("Download Results as CSV", variant="huggingface") |
|
download_file = gr.File(label="Downloadable File") |
|
|
|
download_button.click( |
|
fn=save_to_csv, |
|
inputs=output_mining, |
|
outputs=download_file |
|
) |
|
|
|
with gr.Tab("Semantic Textual Similarity"): |
|
with gr.Row(): |
|
gr.Markdown( |
|
"### Semantic Textual Similarity (STS), we want to produce embeddings for all texts involved and calculate the similarities between them") |
|
with gr.Row(): |
|
with gr.Column(): |
|
gr.Markdown("#### sentences 1") |
|
upload_button_sentences1 = gr.UploadButton(label="upload sentences 1 csv", file_types=['.csv'], |
|
file_count="single") |
|
output_data_sentences1 = gr.Dataframe(headers=["text"], col_count=1, label="sentences 1 data") |
|
|
|
upload_button_sentences1.upload(fn=getDataFrame, inputs=upload_button_sentences1, |
|
outputs=output_data_sentences1, concurrency_limit=CONCURRENCY_LIMIT) |
|
|
|
with gr.Column(): |
|
gr.Markdown("#### sentences 2") |
|
upload_button_sentences2 = gr.UploadButton(label="upload sentences 2 csv", file_types=['.csv'], |
|
file_count="single") |
|
output_data_sentences2 = gr.Dataframe(headers=["text"], col_count=1, label="sentences 2 data") |
|
|
|
upload_button_sentences2.upload(fn=getDataFrame, inputs=upload_button_sentences2, |
|
outputs=output_data_sentences2, concurrency_limit=CONCURRENCY_LIMIT) |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
model = gr.Dropdown( |
|
["Lajavaness/bilingual-embedding-large", "sentence-transformers/all-mpnet-base-v2", |
|
"intfloat/multilingual-e5-large-instruct"], label="model", interactive=True) |
|
score_sts = gr.Number(label="score", value=0.96, interactive=True) |
|
submit_button_sts = gr.Button("Submit", variant="primary") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
gr.Markdown("#### STS Results") |
|
|
|
output_sts = gr.Dataframe(headers=["score", "sentence_1", "sentence_2"], type="polars", |
|
label="Semantic Textual Similarit") |
|
|
|
submit_button_sts.click( |
|
fn=sts, |
|
inputs=[model, upload_button_sentences1, upload_button_sentences2, score_sts], |
|
outputs=output_sts |
|
) |
|
|
|
download_button = gr.Button("Download Results as CSV", variant="huggingface") |
|
download_file = gr.File(label="Downloadable File") |
|
|
|
download_button.click( |
|
fn=save_to_csv, |
|
inputs=output_sts, |
|
outputs=download_file |
|
) |
|
|
|
if __name__ == "__main__": |
|
multiprocessing.set_start_method("spawn") |
|
|
|
folder_path = "data" |
|
thread = threading.Thread(target=delete_folder_periodically, args=(folder_path, 1800), daemon=True) |
|
thread.start() |
|
|
|
print(gr.__version__) |
|
demo.launch() |
|
|