boompack commited on
Commit
5be6938
·
verified ·
1 Parent(s): 55ab780

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -38
app.py CHANGED
@@ -1,11 +1,12 @@
1
  from transformers import pipeline
2
  from dataclasses import dataclass, field
3
- from typing import List, Optional, Dict, Any
4
  import re
5
  from datetime import datetime
6
  import logging
7
  import html
8
  from uuid import uuid4
 
9
 
10
  # Настройка логирования
11
  logging.basicConfig(
@@ -45,6 +46,7 @@ class InstagramCommentAnalyzer:
45
  '''
46
 
47
  def __init__(self, max_depth: int = 10, max_comment_length: int = 2200):
 
48
  self.max_depth = max_depth
49
  self.max_comment_length = max_comment_length
50
  self.pattern = re.compile(self.COMMENT_PATTERN, re.VERBOSE | re.DOTALL)
@@ -58,16 +60,37 @@ class InstagramCommentAnalyzer:
58
  'processed_mentions': 0,
59
  'processed_hashtags': 0
60
  }
61
-
62
- # Явное указание модели для анализа настроений
63
- self.sentiment_analyzer = pipeline(
64
- "sentiment-analysis",
65
- model="distilbert-base-uncased-finetuned-sst-2-english" # Выбор модели
66
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
  def analyze_sentiment(self, text: str) -> str:
69
- result = self.sentiment_analyzer(text)
70
- return result[0]['label']
 
 
 
 
71
 
72
  def normalize_text(self, text: str) -> str:
73
  text = html.unescape(text)
@@ -76,11 +99,14 @@ class InstagramCommentAnalyzer:
76
  return text
77
 
78
  def extract_metadata(self, comment: Comment) -> None:
79
- comment.mentions = re.findall(r'@(\w+)', comment.content)
80
- self.stats['processed_mentions'] += len(comment.mentions)
81
- comment.hashtags = re.findall(r'#(\w+)', comment.content)
82
- self.stats['processed_hashtags'] += len(comment.hashtags)
83
- comment.is_verified = bool(re.search(r'✓|Подтвержденный', comment.username))
 
 
 
84
 
85
  def process_comment(self, text: str, parent_id: Optional[str] = None, level: int = 0) -> Optional[Comment]:
86
  if level > self.max_depth:
@@ -112,46 +138,52 @@ class InstagramCommentAnalyzer:
112
  comment.content = comment.content[:self.max_comment_length] + "..."
113
 
114
  comment.sentiment = self.analyze_sentiment(comment.content)
115
-
116
  self.extract_metadata(comment)
117
  self.stats['total_comments'] += 1
118
  return comment
119
 
120
  except Exception as e:
121
  logger.error(f"Error processing comment: {str(e)}")
122
- comment = Comment(
 
123
  username="[damaged]",
124
  time="",
125
  content="[Поврежденные данные]",
126
  is_deleted=True
127
  )
128
- self.stats['deleted_comments'] += 1
129
- return comment
130
 
131
  def format_comment(self, comment: Comment, index: int) -> str:
132
- if comment.is_deleted:
133
- return f'{index}. "[УДАЛЕНО]" "" "" "Нравится 0"'
 
134
 
135
- return (
136
- f'{index}. "{comment.username}" "{comment.time}" '
137
- f'"{comment.content}" "Нравится {comment.likes}" "Настроение {comment.sentiment}"'
138
- )
 
 
 
139
 
140
  def process_comments(self, text: str) -> List[str]:
141
- self.stats = {key: 0 for key in self.stats}
142
- text = self.normalize_text(text)
143
- raw_comments = text.split('ОтветитьНравится')
144
- formatted_comments = []
145
- for i, raw_comment in enumerate(raw_comments, 1):
146
- if not raw_comment.strip():
147
- continue
148
-
149
- comment = self.process_comment(raw_comment)
150
- if comment:
151
- formatted_comments.append(self.format_comment(comment, i))
152
-
153
- return formatted_comments
154
-
 
 
 
 
155
  def main():
156
  example_text = """
157
  user1 2 нед. This is a positive comment! Отметки "Нравится": 25
 
1
  from transformers import pipeline
2
  from dataclasses import dataclass, field
3
+ from typing import List, Optional, Dict
4
  import re
5
  from datetime import datetime
6
  import logging
7
  import html
8
  from uuid import uuid4
9
+ import torch
10
 
11
  # Настройка логирования
12
  logging.basicConfig(
 
46
  '''
47
 
48
  def __init__(self, max_depth: int = 10, max_comment_length: int = 2200):
49
+ self.check_dependencies()
50
  self.max_depth = max_depth
51
  self.max_comment_length = max_comment_length
52
  self.pattern = re.compile(self.COMMENT_PATTERN, re.VERBOSE | re.DOTALL)
 
60
  'processed_mentions': 0,
61
  'processed_hashtags': 0
62
  }
63
+ self.sentiment_analyzer = self.load_sentiment_model()
64
+
65
+ def check_dependencies(self):
66
+ required_packages = ['torch', 'transformers', 'numpy']
67
+ for package in required_packages:
68
+ try:
69
+ __import__(package)
70
+ except ImportError:
71
+ logger.error(f"Required package {package} is not installed")
72
+ raise
73
+
74
+ def load_sentiment_model(self):
75
+ try:
76
+ device = "cuda" if torch.cuda.is_available() else "cpu"
77
+ logger.info(f"Using device: {device}")
78
+ return pipeline(
79
+ "sentiment-analysis",
80
+ model="distilbert-base-uncased-finetuned-sst-2-english",
81
+ device=device
82
+ )
83
+ except Exception as e:
84
+ logger.error(f"Model loading failed: {str(e)}")
85
+ raise
86
 
87
  def analyze_sentiment(self, text: str) -> str:
88
+ try:
89
+ result = self.sentiment_analyzer(text)
90
+ return result[0]['label']
91
+ except Exception as e:
92
+ logger.error(f"Sentiment analysis failed: {str(e)}")
93
+ return "UNKNOWN"
94
 
95
  def normalize_text(self, text: str) -> str:
96
  text = html.unescape(text)
 
99
  return text
100
 
101
  def extract_metadata(self, comment: Comment) -> None:
102
+ try:
103
+ comment.mentions = re.findall(r'@(\w+)', comment.content)
104
+ self.stats['processed_mentions'] += len(comment.mentions)
105
+ comment.hashtags = re.findall(r'#(\w+)', comment.content)
106
+ self.stats['processed_hashtags'] += len(comment.hashtags)
107
+ comment.is_verified = bool(re.search(r'✓|Подтвержденный', comment.username))
108
+ except Exception as e:
109
+ logger.error(f"Metadata extraction failed: {str(e)}")
110
 
111
  def process_comment(self, text: str, parent_id: Optional[str] = None, level: int = 0) -> Optional[Comment]:
112
  if level > self.max_depth:
 
138
  comment.content = comment.content[:self.max_comment_length] + "..."
139
 
140
  comment.sentiment = self.analyze_sentiment(comment.content)
 
141
  self.extract_metadata(comment)
142
  self.stats['total_comments'] += 1
143
  return comment
144
 
145
  except Exception as e:
146
  logger.error(f"Error processing comment: {str(e)}")
147
+ self.stats['deleted_comments'] += 1
148
+ return Comment(
149
  username="[damaged]",
150
  time="",
151
  content="[Поврежденные данные]",
152
  is_deleted=True
153
  )
 
 
154
 
155
  def format_comment(self, comment: Comment, index: int) -> str:
156
+ try:
157
+ if comment.is_deleted:
158
+ return f'{index}. "[УДАЛЕНО]" "" "" "Нравится 0"'
159
 
160
+ return (
161
+ f'{index}. "{comment.username}" "{comment.time}" '
162
+ f'"{comment.content}" "Нравится {comment.likes}" "Настроени�� {comment.sentiment}"'
163
+ )
164
+ except Exception as e:
165
+ logger.error(f"Error formatting comment: {str(e)}")
166
+ return f'{index}. "[ОШИБКА ФОРМАТИРОВАНИЯ]"'
167
 
168
  def process_comments(self, text: str) -> List[str]:
169
+ try:
170
+ self.stats = {key: 0 for key in self.stats}
171
+ text = self.normalize_text(text)
172
+ raw_comments = text.split('ОтветитьНравится')
173
+ formatted_comments = []
174
+
175
+ for i, raw_comment in enumerate(raw_comments, 1):
176
+ if not raw_comment.strip():
177
+ continue
178
+
179
+ comment = self.process_comment(raw_comment)
180
+ if comment:
181
+ formatted_comments.append(self.format_comment(comment, i))
182
+
183
+ return formatted_comments
184
+ except Exception as e:
185
+ logger.error(f"Error processing comments: {str(e)}")
186
+ return ["[ОШИБКА ОБРАБОТКИ КОММЕНТАРИЕВ]"]
187
  def main():
188
  example_text = """
189
  user1 2 нед. This is a positive comment! Отметки "Нравится": 25