File size: 5,386 Bytes
1822f54 fb3abe1 1822f54 fb3abe1 1822f54 fb3abe1 1822f54 fb3abe1 1822f54 fb3abe1 1822f54 fb3abe1 1822f54 222cf81 e9536a9 1822f54 e9536a9 1822f54 fb3abe1 1822f54 222cf81 1822f54 fb3abe1 1822f54 fb3abe1 1822f54 fb3abe1 1822f54 fb3abe1 1822f54 fb3abe1 1822f54 fb3abe1 1822f54 77196ea e9536a9 1822f54 fb3abe1 1822f54 77196ea 1822f54 fb3abe1 1822f54 fb3abe1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
import multiprocessing
import threading
import gradio as gr
from mining import mining
from sts import sts
from utils import getDataFrame, save_to_csv, delete_folder_periodically
CONCURRENCY_LIMIT = 5
with gr.Blocks() as demo:
with gr.Tab("Paraphrase Mining"):
with gr.Row():
gr.Markdown(
"### Paraphrase mining is the task of finding paraphrases (texts with identical / similar meaning) in a large corpus of sentences")
with gr.Row():
with gr.Column():
gr.Markdown("#### sentences")
upload_button_sentences = gr.UploadButton(label="upload sentences csv", file_types=['.csv'],
file_count="single")
output_data_sentences = gr.Dataframe(headers=["text"], col_count=1, label="sentences data")
upload_button_sentences.upload(fn=getDataFrame, inputs=upload_button_sentences,
outputs=output_data_sentences, concurrency_limit=CONCURRENCY_LIMIT)
with gr.Row():
with gr.Column():
model = gr.Dropdown(
["Lajavaness/bilingual-embedding-large", "sentence-transformers/all-mpnet-base-v2",
"intfloat/multilingual-e5-large-instruct"], label="model", interactive=True)
score_mining = gr.Number(label="score", value=0.96, interactive=True)
submit_button_mining = gr.Button("Submit", variant="primary")
with gr.Row():
with gr.Column():
output_mining = gr.Dataframe(headers=["score", "sentence_1", "sentence_2"], type="polars",
label="Mining")
submit_button_mining.click(
fn=mining,
inputs=[model, upload_button_sentences, score_mining],
outputs=output_mining
)
download_button = gr.Button("Download Results as CSV", variant="huggingface")
download_file = gr.File(label="Downloadable File")
download_button.click(
fn=save_to_csv,
inputs=output_mining,
outputs=download_file
)
with gr.Tab("Semantic Textual Similarity"):
with gr.Row(): # Row for the title
gr.Markdown(
"### Semantic Textual Similarity (STS), we want to produce embeddings for all texts involved and calculate the similarities between them")
with gr.Row(): # First row of two columns
with gr.Column():
gr.Markdown("#### sentences 1")
upload_button_sentences1 = gr.UploadButton(label="upload sentences 1 csv", file_types=['.csv'],
file_count="single")
output_data_sentences1 = gr.Dataframe(headers=["text"], col_count=1, label="sentences 1 data")
upload_button_sentences1.upload(fn=getDataFrame, inputs=upload_button_sentences1,
outputs=output_data_sentences1, concurrency_limit=CONCURRENCY_LIMIT)
with gr.Column():
gr.Markdown("#### sentences 2")
upload_button_sentences2 = gr.UploadButton(label="upload sentences 2 csv", file_types=['.csv'],
file_count="single")
output_data_sentences2 = gr.Dataframe(headers=["text"], col_count=1, label="sentences 2 data")
upload_button_sentences2.upload(fn=getDataFrame, inputs=upload_button_sentences2,
outputs=output_data_sentences2, concurrency_limit=CONCURRENCY_LIMIT)
with gr.Row():
with gr.Column():
model = gr.Dropdown(
["Lajavaness/bilingual-embedding-large", "sentence-transformers/all-mpnet-base-v2",
"intfloat/multilingual-e5-large-instruct"], label="model", interactive=True)
score_sts = gr.Number(label="score", value=0.96, interactive=True)
submit_button_sts = gr.Button("Submit", variant="primary")
with gr.Row():
with gr.Column():
gr.Markdown("#### STS Results")
output_sts = gr.Dataframe(headers=["score", "sentence_1", "sentence_2"], type="polars",
label="Semantic Textual Similarit")
submit_button_sts.click(
fn=sts,
inputs=[model, upload_button_sentences1, upload_button_sentences2, score_sts],
outputs=output_sts
)
download_button = gr.Button("Download Results as CSV", variant="huggingface")
download_file = gr.File(label="Downloadable File")
download_button.click(
fn=save_to_csv,
inputs=output_sts,
outputs=download_file
)
if __name__ == "__main__":
multiprocessing.set_start_method("spawn")
folder_path = "data"
thread = threading.Thread(target=delete_folder_periodically, args=(folder_path, 1800), daemon=True)
thread.start()
print(gr.__version__)
demo.launch()
|