import gradio as gr from typing import List from punctuators.models import SBDModelONNX # Instantiate this model # This will download the ONNX and SPE models. To clean up, delete this model from your HF cache directory. m = SBDModelONNX.from_pretrained("sbd_multi_lang") def sentence_boundary_detection_old(input_texts): # Run inference results: List[List[str]] = m.infer([input_texts]) print(results) sentences = "\n".join(results[0]) return sentences, len(results[0]) import nltk # Download the necessary NLTK data files nltk.download('punkt_tab') from nltk.tokenize import PunktTokenizer # Load the Slovenian tokenizer slovenian_tokenizer = PunktTokenizer("slovene") def sentence_boundary_detection(text): # Tokenize the text into sentences sentences = slovenian_tokenizer.tokenize(text) # Count the number of sentences sentence_count = len(sentences) # Join sentences with newlines for display sentences_text = "\n".join(sentences) return sentences_text, sentence_count # Gradio interface iface = gr.Interface( fn=sentence_boundary_detection, inputs=gr.Textbox(label="Input Text", lines=10, placeholder="Enter text here..."), outputs=[ gr.Textbox(label="Sentences", lines=10, placeholder="Sentences will appear here..."), gr.Number(label="Number of Sentences") ], examples = ["Tradicionalni 32. Hrvatski bal Austrijsko-hrvatske zajednice za kulturu i šport (AHZ), održan je u subotu navečer u Hotelu Arcotel-Wimberger u Beču. Okupio je oko 450 Hrvata iz Beča i cijele Austrije te njihove austrijske prijatelje. Brojni gosti ove godine došli su i iz Hrvatske, Njemačke i nekih drugih zemalja u kojima također žive Hrvati. Bal je otvoren je uz impresivan glazbeno scenski nastup plesnih parova poznate bečke Plesne škole Rueff."], title="Sentence Boundary Detection", description="Enter text to detect sentence boundaries and count the number of sentences." ) # Launch the Gradio app iface.launch()