File size: 2,041 Bytes
a99e8a0
 
 
 
 
 
 
 
 
 
 
017a93f
a99e8a0
0357372
db503ff
5560575
 
a99e8a0
 
017a93f
 
 
7f99bd8
017a93f
25799a7
 
017a93f
25799a7
017a93f
 
 
25799a7
017a93f
 
 
 
 
 
 
 
 
 
a99e8a0
 
 
f5fba1f
a99e8a0
f5fba1f
a99e8a0
 
cc3314a
a99e8a0
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import gradio as gr


from typing import List

from punctuators.models import SBDModelONNX

# Instantiate this model
# This will download the ONNX and SPE models. To clean up, delete this model from your HF cache directory.
m = SBDModelONNX.from_pretrained("sbd_multi_lang")

def sentence_boundary_detection_old(input_texts):
    # Run inference
    results: List[List[str]] = m.infer([input_texts])
    print(results)
    sentences = "\n".join(results[0])
    return sentences, len(results[0])


import nltk

# Download the necessary NLTK data files
nltk.download('punkt_tab')

from nltk.tokenize import PunktTokenizer

# Load the Slovenian tokenizer
slovenian_tokenizer = PunktTokenizer("slovene")

def sentence_boundary_detection(text):
    # Tokenize the text into sentences
    
    sentences = slovenian_tokenizer.tokenize(text)
    
    # Count the number of sentences
    sentence_count = len(sentences)
    
    # Join sentences with newlines for display
    sentences_text = "\n".join(sentences)
    
    return sentences_text, sentence_count
    
# Gradio interface
iface = gr.Interface(
    fn=sentence_boundary_detection,
    inputs=gr.Textbox(label="Input Text", lines=10, placeholder="Enter text here..."),
    outputs=[
        gr.Textbox(label="Sentences", lines=10, placeholder="Sentences will appear here..."),
        gr.Number(label="Number of Sentences")
    ],
    examples = ["Tradicionalni 32. Hrvatski bal Austrijsko-hrvatske zajednice za kulturu i šport (AHZ), održan je u subotu navečer u Hotelu Arcotel-Wimberger u Beču. Okupio je oko 450 Hrvata iz Beča i cijele Austrije te njihove austrijske prijatelje. Brojni gosti ove godine došli su i iz Hrvatske, Njemačke i nekih drugih zemalja u kojima također žive Hrvati. Bal je otvoren je uz impresivan glazbeno scenski nastup plesnih parova poznate bečke Plesne škole Rueff."],
    title="Sentence Boundary Detection",
    description="Enter text to detect sentence boundaries and count the number of sentences."
)

# Launch the Gradio app
iface.launch()