Spaces:

thak123
/

text-sentence-boundary-detection

Running

thak123 commited on Feb 26

Commit

25799a7

verified ·

1 Parent(s): 9fe2b23

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -22,11 +22,14 @@ import nltk
 # Download the necessary NLTK data files
 nltk.download('punkt_tab')
 # Load the Slovenian tokenizer
-slovenian_tokenizer = nltk.data.load('tokenizers/punkt/slovenian.pickle')
 def sentence_boundary_detection(text):
     # Tokenize the text into sentences
     sentences = slovenian_tokenizer.tokenize(text)
     # Count the number of sentences

 # Download the necessary NLTK data files
 nltk.download('punkt_tab')
+from nltk.tokenize import PunktTokenizer
 # Load the Slovenian tokenizer
+slovenian_tokenizer = PunktTokenizer("slovene")
 def sentence_boundary_detection(text):
     # Tokenize the text into sentences
     sentences = slovenian_tokenizer.tokenize(text)
     # Count the number of sentences