Spaces:

Yehor
/

punctuation-uk

Sleeping

File size: 5,331 Bytes

import sys
import time

import gradio as gr

from nemo import __version__ as nemo_version
from nemo.collections.nlp.models import PunctuationCapitalizationModel

# Config
model_name = "dchaplinsky/punctuation_uk_bert"
concurrency_limit = 5

# Load the model
model = PunctuationCapitalizationModel.from_pretrained(model_name)

examples = [
    "тема про яку не люблять говорити офіційні джерела у генштабі і міноборони це хімічна зброя окупанти вже тривалий час використовують хімічну зброю заборонену",
    "всіма конвенціями якщо спочатку це були гранати з дронів то тепер фіксують випадки застосування",
    "хімічних снарядів причому склад отруйної речовони різний а отже й наслідки для наших військових теж різні",
    "використовує на фронті все що має і хімічна зброя не вийняток тож з чим маємо справу розбиралася марія моганисян",
    "двох тисяч випадків застосування росіянами боєприпасів споряджених небезпечними хімічними речовинами",
    "на всі писані норми марія моганисян олександр моторний спецкор марафон єдині новини",
]

title = "Restore Punctuation and Capitalization for Ukrainian"

# https://www.tablesgenerator.com/markdown_tables
authors_table = """
## Authors

Follow them on social networks and **contact** if you need any help or have any questions:

| <img src="https://avatars.githubusercontent.com/u/7875085?v=4" width="100"> **Yehor Smoliakov** |
|-------------------------------------------------------------------------------------------------|
| https://t.me/smlkw in Telegram                                                                  |
| https://x.com/yehor_smoliakov at X                                                              |
| https://github.com/egorsmkv at GitHub                                                           |
| https://huggingface.co/Yehor at Hugging Face                                                    |
| or use [email protected]                                                                       |
""".strip()

description_head = f"""
# {title}

## Overview

This space uses https://huggingface.co/dchaplinsky/punctuation_uk_bert model.

Paste the text you want to enhance.
""".strip()

description_foot = f"""
{authors_table}
""".strip()

enhanced_text_value = """
Enhanced text will appear here.

Choose **an example** below the Enhance button or paste **your text**.
""".strip()

tech_env = f"""
#### Environment

- Python: {sys.version}
""".strip()

tech_libraries = f"""
#### Libraries

- nemo: {nemo_version}
- gradio: {gr.__version__}
""".strip()


def inference(text, progress=gr.Progress()):
    if not text:
        raise gr.Error("Please paste your text.")

    gr.Info("Starting enhancing", duration=2)

    progress(0, desc="Enhancing...")

    results = []

    sentences = [
        text,
    ]

    for sentence in progress.tqdm(sentences, desc="Enhancing...", unit="sentence"):
        sentence = sentence.strip()

        if len(sentence) == 0:
            continue

        t0 = time.time()

        predictions = model.add_punctuation_capitalization([sentence])

        if not predictions:
            predictions = "-"

        elapsed_time = round(time.time() - t0, 2)

        enhanced_text = "\n".join(predictions)

        if sentence != enhanced_text:
            enhanced_text = enhanced_text.strip()
            results.append(
                {
                    "sentence": sentence,
                    "enhanced_text": enhanced_text,
                    "elapsed_time": elapsed_time,
                }
            )

    gr.Info("Finished!", duration=2)

    result_texts = []

    for result in results:
        result_texts.append(f'> {result["enhanced_text"]}')
        result_texts.append("\n")

    sum_elapsed_text = sum([result["elapsed_time"] for result in results])
    result_texts.append(f"Elapsed time: {sum_elapsed_text} seconds")

    return "\n".join(result_texts)


demo = gr.Blocks(
    title=title,
    analytics_enabled=False,
    theme=gr.themes.Base(),
)

with demo:
    gr.Markdown(description_head)

    gr.Markdown("## Usage")

    with gr.Row():
        text = gr.Textbox(label="Text", autofocus=True, max_lines=1)
        enhanced_text = gr.Textbox(
            label="Enhanced text",
            placeholder=enhanced_text_value,
            show_copy_button=True,
        )

    gr.Button("Enhance").click(
        inference,
        concurrency_limit=concurrency_limit,
        inputs=text,
        outputs=enhanced_text,
    )

    with gr.Row():
        gr.Examples(label="Choose an example", inputs=text, examples=examples)

    gr.Markdown(description_foot)

    gr.Markdown("### Gradio app uses:")
    gr.Markdown(tech_env)
    gr.Markdown(tech_libraries)

if __name__ == "__main__":
    demo.queue()
    demo.launch()