Till Fischer
commited on
Commit
·
240324f
1
Parent(s):
cac53d2
Fix NLTK punkt setup and remove redundant downloads
Browse files- analyze_aspects.py +6 -5
analyze_aspects.py
CHANGED
@@ -12,6 +12,11 @@ import nltk
|
|
12 |
from transformers import pipeline
|
13 |
from collections import defaultdict
|
14 |
import matplotlib.pyplot as plt
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
def visualize_aspects(aspect_results: dict[str, list[float]], output_dir: Path, filename: str = "sentiment_aspekte.png"):
|
17 |
output_dir.mkdir(parents=True, exist_ok=True)
|
@@ -118,11 +123,7 @@ def analyze_quickwin(db_path: Path, isbn: str, device: int = -1, languages: list
|
|
118 |
continue
|
119 |
|
120 |
logger.info(f"Review ID {review_id} ({lang}) wird verarbeitet.")
|
121 |
-
|
122 |
-
nltk.download('punkt')
|
123 |
-
nltk.data.path.append("/home/user/nltk_data")
|
124 |
-
os.environ["NLTK_DATA"] = "/home/user/nltk_data"
|
125 |
-
|
126 |
lang_map = {'de': 'german', 'en': 'english'}
|
127 |
sentences = sent_tokenize(text, language=lang_map.get(lang, 'english'))
|
128 |
|
|
|
12 |
from transformers import pipeline
|
13 |
from collections import defaultdict
|
14 |
import matplotlib.pyplot as plt
|
15 |
+
import os
|
16 |
+
|
17 |
+
nltk.download('punkt', download_dir='/home/user/nltk_data')
|
18 |
+
nltk.data.path.append('/home/user/nltk_data')
|
19 |
+
os.environ['NLTK_DATA'] = '/home/user/nltk_data'
|
20 |
|
21 |
def visualize_aspects(aspect_results: dict[str, list[float]], output_dir: Path, filename: str = "sentiment_aspekte.png"):
|
22 |
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
123 |
continue
|
124 |
|
125 |
logger.info(f"Review ID {review_id} ({lang}) wird verarbeitet.")
|
126 |
+
|
|
|
|
|
|
|
|
|
127 |
lang_map = {'de': 'german', 'en': 'english'}
|
128 |
sentences = sent_tokenize(text, language=lang_map.get(lang, 'english'))
|
129 |
|