import multiprocessing import threading import gradio as gr from mining import mining from sts import sts from utils import getDataFrame, save_to_csv, delete_folder_periodically CONCURRENCY_LIMIT = 5 with gr.Blocks() as demo: with gr.Tab("Paraphrase Mining"): with gr.Row(): gr.Markdown( "### Paraphrase mining is the task of finding paraphrases (texts with identical / similar meaning) in a large corpus of sentences") with gr.Row(): with gr.Column(): gr.Markdown("#### sentences") upload_button_sentences = gr.UploadButton(label="upload sentences csv", file_types=['.csv'], file_count="single") output_data_sentences = gr.Dataframe(headers=["text"], col_count=1, label="sentences data") upload_button_sentences.upload(fn=getDataFrame, inputs=upload_button_sentences, outputs=output_data_sentences, concurrency_limit=CONCURRENCY_LIMIT) with gr.Row(): with gr.Column(): model = gr.Dropdown( ["Lajavaness/bilingual-embedding-large", "sentence-transformers/all-mpnet-base-v2", "intfloat/multilingual-e5-large-instruct"], label="model", interactive=True) score_mining = gr.Number(label="score", value=0.96, interactive=True) submit_button_mining = gr.Button("Submit", variant="primary") with gr.Row(): with gr.Column(): output_mining = gr.Dataframe(headers=["score", "sentence_1", "sentence_2"], type="polars", label="Mining") submit_button_mining.click( fn=mining, inputs=[model, upload_button_sentences, score_mining], outputs=output_mining ) download_button = gr.Button("Download Results as CSV", variant="huggingface") download_file = gr.File(label="Downloadable File") download_button.click( fn=save_to_csv, inputs=output_mining, outputs=download_file ) with gr.Tab("Semantic Textual Similarity"): with gr.Row(): # Row for the title gr.Markdown( "### Semantic Textual Similarity (STS), we want to produce embeddings for all texts involved and calculate the similarities between them") with gr.Row(): # First row of two columns with gr.Column(): gr.Markdown("#### sentences 1") upload_button_sentences1 = gr.UploadButton(label="upload sentences 1 csv", file_types=['.csv'], file_count="single") output_data_sentences1 = gr.Dataframe(headers=["text"], col_count=1, label="sentences 1 data") upload_button_sentences1.upload(fn=getDataFrame, inputs=upload_button_sentences1, outputs=output_data_sentences1, concurrency_limit=CONCURRENCY_LIMIT) with gr.Column(): gr.Markdown("#### sentences 2") upload_button_sentences2 = gr.UploadButton(label="upload sentences 2 csv", file_types=['.csv'], file_count="single") output_data_sentences2 = gr.Dataframe(headers=["text"], col_count=1, label="sentences 2 data") upload_button_sentences2.upload(fn=getDataFrame, inputs=upload_button_sentences2, outputs=output_data_sentences2, concurrency_limit=CONCURRENCY_LIMIT) with gr.Row(): with gr.Column(): model = gr.Dropdown( ["Lajavaness/bilingual-embedding-large", "sentence-transformers/all-mpnet-base-v2", "intfloat/multilingual-e5-large-instruct"], label="model", interactive=True) score_sts = gr.Number(label="score", value=0.96, interactive=True) submit_button_sts = gr.Button("Submit", variant="primary") with gr.Row(): with gr.Column(): gr.Markdown("#### STS Results") output_sts = gr.Dataframe(headers=["score", "sentence_1", "sentence_2"], type="polars", label="Semantic Textual Similarit") submit_button_sts.click( fn=sts, inputs=[model, upload_button_sentences1, upload_button_sentences2, score_sts], outputs=output_sts ) download_button = gr.Button("Download Results as CSV", variant="huggingface") download_file = gr.File(label="Downloadable File") download_button.click( fn=save_to_csv, inputs=output_sts, outputs=download_file ) if __name__ == "__main__": multiprocessing.set_start_method("spawn") folder_path = "data" thread = threading.Thread(target=delete_folder_periodically, args=(folder_path, 1800), daemon=True) thread.start() print(gr.__version__) demo.launch()