Till Fischer commited on
Commit
240324f
·
1 Parent(s): cac53d2

Fix NLTK punkt setup and remove redundant downloads

Browse files
Files changed (1) hide show
  1. analyze_aspects.py +6 -5
analyze_aspects.py CHANGED
@@ -12,6 +12,11 @@ import nltk
12
  from transformers import pipeline
13
  from collections import defaultdict
14
  import matplotlib.pyplot as plt
 
 
 
 
 
15
 
16
  def visualize_aspects(aspect_results: dict[str, list[float]], output_dir: Path, filename: str = "sentiment_aspekte.png"):
17
  output_dir.mkdir(parents=True, exist_ok=True)
@@ -118,11 +123,7 @@ def analyze_quickwin(db_path: Path, isbn: str, device: int = -1, languages: list
118
  continue
119
 
120
  logger.info(f"Review ID {review_id} ({lang}) wird verarbeitet.")
121
- import os
122
- nltk.download('punkt')
123
- nltk.data.path.append("/home/user/nltk_data")
124
- os.environ["NLTK_DATA"] = "/home/user/nltk_data"
125
-
126
  lang_map = {'de': 'german', 'en': 'english'}
127
  sentences = sent_tokenize(text, language=lang_map.get(lang, 'english'))
128
 
 
12
  from transformers import pipeline
13
  from collections import defaultdict
14
  import matplotlib.pyplot as plt
15
+ import os
16
+
17
+ nltk.download('punkt', download_dir='/home/user/nltk_data')
18
+ nltk.data.path.append('/home/user/nltk_data')
19
+ os.environ['NLTK_DATA'] = '/home/user/nltk_data'
20
 
21
  def visualize_aspects(aspect_results: dict[str, list[float]], output_dir: Path, filename: str = "sentiment_aspekte.png"):
22
  output_dir.mkdir(parents=True, exist_ok=True)
 
123
  continue
124
 
125
  logger.info(f"Review ID {review_id} ({lang}) wird verarbeitet.")
126
+
 
 
 
 
127
  lang_map = {'de': 'german', 'en': 'english'}
128
  sentences = sent_tokenize(text, language=lang_map.get(lang, 'english'))
129