Spaces:

thak123
/

text-sentence-boundary-detection

Running

App Files Files Community

text-sentence-boundary-detection / app.py

thak123

Update app.py

cc3314a verified 1 day ago

raw

history blame contribute delete

2.04 kB

	import gradio as gr


	from typing import List

	from punctuators.models import SBDModelONNX

	# Instantiate this model
	# This will download the ONNX and SPE models. To clean up, delete this model from your HF cache directory.
	m = SBDModelONNX.from_pretrained("sbd_multi_lang")

	def sentence_boundary_detection_old(input_texts):
	# Run inference
	results: List[List[str]] = m.infer([input_texts])
	print(results)
	sentences = "\n".join(results[0])
	return sentences, len(results[0])


	import nltk

	# Download the necessary NLTK data files
	nltk.download('punkt_tab')

	from nltk.tokenize import PunktTokenizer

	# Load the Slovenian tokenizer
	slovenian_tokenizer = PunktTokenizer("slovene")

	def sentence_boundary_detection(text):
	# Tokenize the text into sentences

	sentences = slovenian_tokenizer.tokenize(text)

	# Count the number of sentences
	sentence_count = len(sentences)

	# Join sentences with newlines for display
	sentences_text = "\n".join(sentences)

	return sentences_text, sentence_count

	# Gradio interface
	iface = gr.Interface(
	fn=sentence_boundary_detection,
	inputs=gr.Textbox(label="Input Text", lines=10, placeholder="Enter text here..."),
	outputs=[
	gr.Textbox(label="Sentences", lines=10, placeholder="Sentences will appear here..."),
	gr.Number(label="Number of Sentences")
	],
	examples = ["Tradicionalni 32. Hrvatski bal Austrijsko-hrvatske zajednice za kulturu i šport (AHZ), održan je u subotu navečer u Hotelu Arcotel-Wimberger u Beču. Okupio je oko 450 Hrvata iz Beča i cijele Austrije te njihove austrijske prijatelje. Brojni gosti ove godine došli su i iz Hrvatske, Njemačke i nekih drugih zemalja u kojima također žive Hrvati. Bal je otvoren je uz impresivan glazbeno scenski nastup plesnih parova poznate bečke Plesne škole Rueff."],
	title="Sentence Boundary Detection",
	description="Enter text to detect sentence boundaries and count the number of sentences."
	)

	# Launch the Gradio app
	iface.launch()