File size: 2,041 Bytes
a99e8a0 017a93f a99e8a0 0357372 db503ff 5560575 a99e8a0 017a93f 7f99bd8 017a93f 25799a7 017a93f 25799a7 017a93f 25799a7 017a93f a99e8a0 f5fba1f a99e8a0 f5fba1f a99e8a0 cc3314a a99e8a0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import gradio as gr
from typing import List
from punctuators.models import SBDModelONNX
# Instantiate this model
# This will download the ONNX and SPE models. To clean up, delete this model from your HF cache directory.
m = SBDModelONNX.from_pretrained("sbd_multi_lang")
def sentence_boundary_detection_old(input_texts):
# Run inference
results: List[List[str]] = m.infer([input_texts])
print(results)
sentences = "\n".join(results[0])
return sentences, len(results[0])
import nltk
# Download the necessary NLTK data files
nltk.download('punkt_tab')
from nltk.tokenize import PunktTokenizer
# Load the Slovenian tokenizer
slovenian_tokenizer = PunktTokenizer("slovene")
def sentence_boundary_detection(text):
# Tokenize the text into sentences
sentences = slovenian_tokenizer.tokenize(text)
# Count the number of sentences
sentence_count = len(sentences)
# Join sentences with newlines for display
sentences_text = "\n".join(sentences)
return sentences_text, sentence_count
# Gradio interface
iface = gr.Interface(
fn=sentence_boundary_detection,
inputs=gr.Textbox(label="Input Text", lines=10, placeholder="Enter text here..."),
outputs=[
gr.Textbox(label="Sentences", lines=10, placeholder="Sentences will appear here..."),
gr.Number(label="Number of Sentences")
],
examples = ["Tradicionalni 32. Hrvatski bal Austrijsko-hrvatske zajednice za kulturu i šport (AHZ), održan je u subotu navečer u Hotelu Arcotel-Wimberger u Beču. Okupio je oko 450 Hrvata iz Beča i cijele Austrije te njihove austrijske prijatelje. Brojni gosti ove godine došli su i iz Hrvatske, Njemačke i nekih drugih zemalja u kojima također žive Hrvati. Bal je otvoren je uz impresivan glazbeno scenski nastup plesnih parova poznate bečke Plesne škole Rueff."],
title="Sentence Boundary Detection",
description="Enter text to detect sentence boundaries and count the number of sentences."
)
# Launch the Gradio app
iface.launch()
|