thak123 commited on
Commit
25799a7
·
verified ·
1 Parent(s): 9fe2b23

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -1
app.py CHANGED
@@ -22,11 +22,14 @@ import nltk
22
  # Download the necessary NLTK data files
23
  nltk.download('punkt_tab')
24
 
 
 
25
  # Load the Slovenian tokenizer
26
- slovenian_tokenizer = nltk.data.load('tokenizers/punkt/slovenian.pickle')
27
 
28
  def sentence_boundary_detection(text):
29
  # Tokenize the text into sentences
 
30
  sentences = slovenian_tokenizer.tokenize(text)
31
 
32
  # Count the number of sentences
 
22
  # Download the necessary NLTK data files
23
  nltk.download('punkt_tab')
24
 
25
+ from nltk.tokenize import PunktTokenizer
26
+
27
  # Load the Slovenian tokenizer
28
+ slovenian_tokenizer = PunktTokenizer("slovene")
29
 
30
  def sentence_boundary_detection(text):
31
  # Tokenize the text into sentences
32
+
33
  sentences = slovenian_tokenizer.tokenize(text)
34
 
35
  # Count the number of sentences