albertmartinez's picture
update mining
222cf81
import multiprocessing
import threading
import gradio as gr
from mining import mining
from sts import sts
from utils import getDataFrame, save_to_csv, delete_folder_periodically
CONCURRENCY_LIMIT = 5
with gr.Blocks() as demo:
with gr.Tab("Paraphrase Mining"):
with gr.Row():
gr.Markdown(
"### Paraphrase mining is the task of finding paraphrases (texts with identical / similar meaning) in a large corpus of sentences")
with gr.Row():
with gr.Column():
gr.Markdown("#### sentences")
upload_button_sentences = gr.UploadButton(label="upload sentences csv", file_types=['.csv'],
file_count="single")
output_data_sentences = gr.Dataframe(headers=["text"], col_count=1, label="sentences data")
upload_button_sentences.upload(fn=getDataFrame, inputs=upload_button_sentences,
outputs=output_data_sentences, concurrency_limit=CONCURRENCY_LIMIT)
with gr.Row():
with gr.Column():
model = gr.Dropdown(
["Lajavaness/bilingual-embedding-large", "sentence-transformers/all-mpnet-base-v2",
"intfloat/multilingual-e5-large-instruct"], label="model", interactive=True)
score_mining = gr.Number(label="score", value=0.96, interactive=True)
submit_button_mining = gr.Button("Submit", variant="primary")
with gr.Row():
with gr.Column():
output_mining = gr.Dataframe(headers=["score", "sentence_1", "sentence_2"], type="polars",
label="Mining")
submit_button_mining.click(
fn=mining,
inputs=[model, upload_button_sentences, score_mining],
outputs=output_mining
)
download_button = gr.Button("Download Results as CSV", variant="huggingface")
download_file = gr.File(label="Downloadable File")
download_button.click(
fn=save_to_csv,
inputs=output_mining,
outputs=download_file
)
with gr.Tab("Semantic Textual Similarity"):
with gr.Row(): # Row for the title
gr.Markdown(
"### Semantic Textual Similarity (STS), we want to produce embeddings for all texts involved and calculate the similarities between them")
with gr.Row(): # First row of two columns
with gr.Column():
gr.Markdown("#### sentences 1")
upload_button_sentences1 = gr.UploadButton(label="upload sentences 1 csv", file_types=['.csv'],
file_count="single")
output_data_sentences1 = gr.Dataframe(headers=["text"], col_count=1, label="sentences 1 data")
upload_button_sentences1.upload(fn=getDataFrame, inputs=upload_button_sentences1,
outputs=output_data_sentences1, concurrency_limit=CONCURRENCY_LIMIT)
with gr.Column():
gr.Markdown("#### sentences 2")
upload_button_sentences2 = gr.UploadButton(label="upload sentences 2 csv", file_types=['.csv'],
file_count="single")
output_data_sentences2 = gr.Dataframe(headers=["text"], col_count=1, label="sentences 2 data")
upload_button_sentences2.upload(fn=getDataFrame, inputs=upload_button_sentences2,
outputs=output_data_sentences2, concurrency_limit=CONCURRENCY_LIMIT)
with gr.Row():
with gr.Column():
model = gr.Dropdown(
["Lajavaness/bilingual-embedding-large", "sentence-transformers/all-mpnet-base-v2",
"intfloat/multilingual-e5-large-instruct"], label="model", interactive=True)
score_sts = gr.Number(label="score", value=0.96, interactive=True)
submit_button_sts = gr.Button("Submit", variant="primary")
with gr.Row():
with gr.Column():
gr.Markdown("#### STS Results")
output_sts = gr.Dataframe(headers=["score", "sentence_1", "sentence_2"], type="polars",
label="Semantic Textual Similarit")
submit_button_sts.click(
fn=sts,
inputs=[model, upload_button_sentences1, upload_button_sentences2, score_sts],
outputs=output_sts
)
download_button = gr.Button("Download Results as CSV", variant="huggingface")
download_file = gr.File(label="Downloadable File")
download_button.click(
fn=save_to_csv,
inputs=output_sts,
outputs=download_file
)
if __name__ == "__main__":
multiprocessing.set_start_method("spawn")
folder_path = "data"
thread = threading.Thread(target=delete_folder_periodically, args=(folder_path, 1800), daemon=True)
thread.start()
print(gr.__version__)
demo.launch()