boompack commited on
Commit
f508547
·
verified ·
1 Parent(s): 8c8dfe8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +438 -213
app.py CHANGED
@@ -1,307 +1,532 @@
1
- # analyzers.py
2
  import re
3
  import emoji
4
  import statistics
5
  from collections import Counter
6
- from typing import Dict, List, Tuple, Optional
7
  import logging
8
- from io import StringIO
 
9
  import csv
 
 
 
10
 
11
- logging.basicConfig(level=logging.INFO)
 
 
 
 
 
 
 
 
 
 
12
  logger = logging.getLogger(__name__)
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  class TextAnalyzer:
15
- """Класс для базового анализа текста"""
 
16
  @staticmethod
17
  def clean_text(text: str) -> str:
18
- return re.sub(r'\s+', ' ', text).strip()
 
19
 
20
  @staticmethod
21
  def count_emojis(text: str) -> int:
22
- return len([c for c in text if c in emoji.EMOJI_DATA])
 
23
 
24
  @staticmethod
25
- def extract_mentions(text: str) -> List[str]:
26
- return re.findall(r'@[\w\.]+', text)
 
27
 
28
  @staticmethod
29
  def get_words(text: str) -> List[str]:
30
- return [w for w in re.findall(r'\w+', text.lower()) if len(w) > 2]
 
31
 
32
  class SentimentAnalyzer:
33
- """Класс для анализа тональности"""
34
- POSITIVE_INDICATORS = {
35
- 'emoji': ['🔥', '❤️', '👍', '😊', '💪', '👏', '🎉', '♥️', '😍', '🙏'],
36
- 'words': ['круто', 'супер', 'класс', 'огонь', 'пушка', 'отлично', 'здорово',
37
- 'прекрасно', 'молодец', 'красота', 'спасибо', 'топ', 'лучший',
38
- 'amazing', 'wonderful', 'great', 'perfect', 'love', 'beautiful']
39
- }
40
 
41
- NEGATIVE_INDICATORS = {
42
- 'emoji': ['👎', '😢', '😞', '😠', '😡', '💔', '😕', '😑'],
43
- 'words': ['плохо', 'ужас', 'отстой', 'фу', 'жесть', 'ужасно',
44
- 'разочарован', 'печаль', 'грустно', 'bad', 'worst',
45
- 'terrible', 'awful', 'sad', 'disappointed']
 
 
 
 
 
 
 
 
 
46
  }
47
 
48
  @classmethod
49
- def analyze(cls, text: str) -> str:
 
 
 
50
  text_lower = text.lower()
51
- pos_count = sum(1 for ind in cls.POSITIVE_INDICATORS['emoji'] + cls.POSITIVE_INDICATORS['words']
52
- if ind in text_lower)
53
- neg_count = sum(1 for ind in cls.NEGATIVE_INDICATORS['emoji'] + cls.NEGATIVE_INDICATORS['words']
54
- if ind in text_lower)
55
 
56
- exclamation_boost = text.count('!') * 0.5
 
 
 
 
 
 
57
  if pos_count > neg_count:
58
- pos_count += exclamation_boost
59
  elif neg_count > pos_count:
60
- neg_count += exclamation_boost
61
 
62
- return 'positive' if pos_count > neg_count else 'negative' if neg_count > pos_count else 'neutral'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  class CommentExtractor:
65
- """Класс для извлечения данных из комментариев"""
 
 
 
 
 
 
66
  PATTERNS = {
67
- 'username': [
68
- r"Фото профиля ([^\n]+)",
69
- r"^([^\s]+)\s+",
70
- r"@([^\s]+)\s+"
71
- ],
72
- 'time': [
73
- r"(\d+)\s*(?:ч|нед)\.",
74
- r"(\d+)\s*(?:h|w)",
75
- r"(\d+)\s*(?:час|hour|week)"
76
- ],
77
- 'likes': [
78
- r"(\d+) отметк[аи] \"Нравится\"",
79
- r"Нравится: (\d+)",
80
- r"(\d+) отметка \"Нравится\"",
81
- r"\"Нравится\": (\d+)",
82
- r"likes?: (\d+)"
83
- ],
84
- 'metadata': [
85
- r"Фото профиля [^\n]+\n",
86
- r"\d+\s*(?:ч|нед|h|w|час|hour|week)\.",
87
- r"Нравится:?\s*\d+",
88
- r"\d+ отметк[аи] \"Нравится\"",
89
- r"Ответить",
90
- r"Показать перевод",
91
- r"Скрыть все ответы",
92
- r"Смотреть все ответы \(\d+\)"
93
- ]
 
 
 
 
 
94
  }
95
 
96
  @classmethod
97
- def extract_data(cls, comment_text: str) -> Tuple[Optional[str], Optional[str], int, float]:
 
98
  try:
99
- # Извлечение имени пользователя
100
- username = None
101
- for pattern in cls.PATTERNS['username']:
102
- if match := re.search(pattern, comment_text):
103
- username = match.group(1).strip()
104
- break
105
-
106
- if not username:
107
- return None, None, 0, 0
108
 
109
- # Очистка комментария
110
- comment = comment_text
111
- for pattern in cls.PATTERNS['metadata'] + [username]:
112
- comment = re.sub(pattern, '', comment)
 
 
 
113
  comment = TextAnalyzer.clean_text(comment)
114
 
115
- # Извлечение времени
116
- weeks = 0
117
- for pattern in cls.PATTERNS['time']:
118
- if match := re.search(pattern, comment_text):
119
- time_value = int(match.group(1))
120
- if any(unit in comment_text.lower() for unit in ['нед', 'w', 'week']):
121
- weeks = time_value
122
- else:
123
- weeks = time_value / (24 * 7)
124
- break
 
 
 
 
 
 
 
 
125
 
126
- # Извлечение лайков
127
- likes = 0
128
- for pattern in cls.PATTERNS['likes']:
129
- if match := re.search(pattern, comment_text):
130
- likes = int(match.group(1))
131
- break
132
-
133
- return username, comment, likes, weeks
134
 
 
 
 
135
  except Exception as e:
136
- logger.error(f"Error extracting comment data: {e}")
137
- return None, None, 0, 0
138
 
139
  class StatsCalculator:
140
- """Класс для расчета статистики"""
 
141
  @staticmethod
142
- def calculate_period_stats(weeks: List[float], likes: List[str], sentiments: List[str]) -> Dict:
143
- if not weeks:
 
144
  return {}
145
 
146
- earliest_week = max(weeks)
147
- latest_week = min(weeks)
148
- week_range = earliest_week - latest_week
 
 
 
149
 
150
- period_length = week_range / 3 if week_range > 0 else 1
151
- engagement_periods = {
152
  'early': [],
153
  'middle': [],
154
  'late': []
155
  }
156
 
157
- for i, week in enumerate(weeks):
158
- if week >= earliest_week - period_length:
159
- engagement_periods['early'].append(i)
160
- elif week >= earliest_week - 2 * period_length:
161
- engagement_periods['middle'].append(i)
162
  else:
163
- engagement_periods['late'].append(i)
164
 
 
165
  return {
166
  period: {
167
- 'comments': len(indices),
168
- 'avg_likes': sum(int(likes[i]) for i in indices) / len(indices) if indices else 0,
169
- 'sentiment_ratio': sum(1 for i in indices if sentiments[i] == 'positive') / len(indices) if indices else 0
 
 
 
170
  }
171
- for period, indices in engagement_periods.items()
172
  }
173
 
174
- def analyze_post(content_type: str, link_to_post: str, post_likes: int, post_date: str,
175
- description: str, comment_count: int, all_comments: str) -> Tuple[str, str, str, str, str]:
176
- """Основная функция анализа поста"""
 
 
 
 
 
 
 
177
  try:
178
- # Разделение на комментарии
179
- comment_patterns = '|'.join([
180
- r"(?=Фото профиля)",
181
- r"(?=\n\s*[a-zA-Z0-9._]+\s+[^\n]+\n)",
182
- r"(?=^[a-zA-Z0-9._]+\s+[^\n]+\n)",
183
- r"(?=@[a-zA-Z0-9._]+\s+[^\n]+\n)"
184
- ])
185
- comments_blocks = [block.strip() for block in re.split(comment_patterns, all_comments)
186
- if block and block.strip() and 'Скрыто алгоритмами Instagram' not in block]
187
 
188
- # Извлечение данных
189
- data = [CommentExtractor.extract_data(block) for block in comments_blocks]
190
- valid_data = [(u, c, l, w) for u, c, l, w in data if all((u, c))]
 
 
191
 
192
- if not valid_data:
193
- return "No comments found", "", "", "", "0"
194
-
195
- usernames, comments, likes, weeks = zip(*valid_data)
196
- likes = [str(l) for l in likes]
197
 
198
- # Анализ комментариев
199
- comment_stats = {
200
- 'lengths': [len(c) for c in comments],
201
- 'words': [len(TextAnalyzer.get_words(c)) for c in comments],
202
- 'emojis': sum(TextAnalyzer.count_emojis(c) for c in comments),
203
- 'mentions': [m for c in comments for m in TextAnalyzer.extract_mentions(c)],
204
- 'sentiments': [SentimentAnalyzer.analyze(c) for c in comments]
205
- }
206
-
207
- # Расчет базовой статистики
208
  basic_stats = {
209
- 'total_comments': len(comments),
210
- 'avg_length': statistics.mean(comment_stats['lengths']),
211
- 'median_length': statistics.median(comment_stats['lengths']),
212
- 'avg_words': statistics.mean(comment_stats['words']),
213
- 'total_likes': sum(map(int, likes)),
214
- 'avg_likes': statistics.mean(map(int, likes))
215
  }
216
 
217
- # Расчет периодов
218
- period_stats = StatsCalculator.calculate_period_stats(weeks, likes, comment_stats['sentiments'])
219
-
220
- # Создание отчета
221
- csv_data = create_csv_report(content_type, link_to_post, post_likes, basic_stats,
222
- comment_stats, period_stats, usernames, comment_stats['mentions'])
223
-
224
- analytics_summary = create_text_report(basic_stats, comment_stats, period_stats, csv_data)
225
 
226
  return (
227
- analytics_summary,
228
- "\n".join(usernames),
229
- "\n".join(comments),
230
- "\n".join(likes),
231
  str(basic_stats['total_likes'])
232
  )
233
 
234
  except Exception as e:
235
- logger.error(f"Error in analyze_post: {e}", exc_info=True)
236
- return f"Error: {str(e)}", "", "", "", "0"
237
 
238
- def create_csv_report(content_type, link, post_likes, basic_stats, comment_stats, period_stats, usernames, mentions):
239
- """Создание CSV отчета"""
240
- csv_data = {
241
- 'metadata': {
242
- 'content_type': content_type,
243
- 'link': link,
244
- 'post_likes': post_likes
245
- },
246
- 'basic_stats': basic_stats,
247
- 'sentiment_stats': dict(Counter(comment_stats['sentiments'])),
248
- 'period_analysis': period_stats,
249
- 'top_users': dict(Counter(usernames).most_common(5)),
250
- 'top_mentioned': dict(Counter(mentions).most_common(5))
251
- }
 
 
 
252
 
253
- output = StringIO()
254
- writer = csv.writer(output)
255
- for section, data in csv_data.items():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
  writer.writerow([section])
257
  for key, value in data.items():
258
  writer.writerow([key, value])
259
  writer.writerow([])
260
- return output.getvalue()
261
-
262
- def create_text_report(basic_stats, comment_stats, period_stats, csv_data):
263
- """Создание текстового отчета"""
264
- sentiment_dist = Counter(comment_stats['sentiments'])
265
- return (
266
- f"CSV DATA:\n{csv_data}\n\n"
267
- f"СТАТИСТИКА:\n"
268
- f"- Всего комментариев: {basic_stats['total_comments']}\n"
269
- f"- Среднее лайков: {basic_stats['avg_likes']:.1f}\n"
270
- f"АНАЛИЗ КОНТЕНТА:\n"
271
- f"- Средняя длина: {basic_stats['avg_length']:.1f}\n"
272
- f"- Медиана длины: {basic_stats['median_length']}\n"
273
- f"- Среднее слов: {basic_stats['avg_words']:.1f}\n"
274
- f"- Эмодзи: {comment_stats['emojis']}\n"
275
- f"ТОНАЛЬНОСТЬ:\n"
276
- f"- Позитив: {sentiment_dist['positive']}\n"
277
- f"- Нейтрально: {sentiment_dist['neutral']}\n"
278
- f"- Негатив: {sentiment_dist['negative']}\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
  )
 
 
 
 
 
280
 
281
- # Создание интерфейса Gradio
282
  import gradio as gr
283
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
  iface = gr.Interface(
285
- fn=analyze_post,
286
  inputs=[
287
- gr.Radio(choices=["Photo", "Video"], label="Content Type", value="Photo"),
288
- gr.Textbox(label="Link to Post"),
289
- gr.Number(label="Likes", value=0),
290
- gr.Textbox(label="Post Date"),
291
- gr.Textbox(label="Description", lines=3),
292
- gr.Number(label="Total Comment Count", value=0),
293
- gr.Textbox(label="All Comments", lines=10)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
  ],
295
  outputs=[
296
- gr.Textbox(label="Analytics Summary", lines=20),
297
- gr.Textbox(label="Usernames"),
298
- gr.Textbox(label="Comments"),
299
- gr.Textbox(label="Likes Chronology"),
300
- gr.Textbox(label="Total Likes on Comments")
 
 
 
 
 
 
 
 
 
 
 
301
  ],
302
  title="Enhanced Instagram Comment Analyzer",
303
- description="Анализатор комментариев Instagram с расширенной аналитикой"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
  )
305
 
306
  if __name__ == "__main__":
307
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import re
2
  import emoji
3
  import statistics
4
  from collections import Counter
5
+ from typing import Dict, List, Tuple, Optional, Set, Union
6
  import logging
7
+ from pathlib import Path
8
+ from datetime import datetime
9
  import csv
10
+ from dataclasses import dataclass, asdict
11
+ from enum import Enum
12
+ import numpy as np
13
 
14
+ # Configure logging
15
+ log_dir = Path("logs")
16
+ log_dir.mkdir(exist_ok=True)
17
+ logging.basicConfig(
18
+ level=logging.INFO,
19
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
20
+ handlers=[
21
+ logging.FileHandler(log_dir / f'analyzer_{datetime.now():%Y%m%d}.log'),
22
+ logging.StreamHandler()
23
+ ]
24
+ )
25
  logger = logging.getLogger(__name__)
26
 
27
+ class Sentiment(str, Enum):
28
+ POSITIVE = 'positive'
29
+ SLIGHTLY_POSITIVE = 'slightly_positive'
30
+ NEUTRAL = 'neutral'
31
+ SLIGHTLY_NEGATIVE = 'slightly_negative'
32
+ NEGATIVE = 'negative'
33
+
34
+ @dataclass
35
+ class CommentData:
36
+ username: str
37
+ text: str
38
+ likes: int
39
+ weeks_ago: float
40
+ sentiment: Sentiment
41
+
42
  class TextAnalyzer:
43
+ """Enhanced text analysis utilities"""
44
+
45
  @staticmethod
46
  def clean_text(text: str) -> str:
47
+ """Clean text using more efficient string splitting"""
48
+ return ' '.join(text.split())
49
 
50
  @staticmethod
51
  def count_emojis(text: str) -> int:
52
+ """Count emojis using set operations for better performance"""
53
+ return len({c for c in text if c in emoji.EMOJI_DATA})
54
 
55
  @staticmethod
56
+ def extract_mentions(text: str) -> Set[str]:
57
+ """Extract mentions returning a set for uniqueness"""
58
+ return set(re.findall(r'@[\w.]+', text))
59
 
60
  @staticmethod
61
  def get_words(text: str) -> List[str]:
62
+ """Extract meaningful words using improved regex"""
63
+ return [w for w in re.findall(r'\b\w{3,}\b', text.lower())]
64
 
65
  class SentimentAnalyzer:
66
+ """Enhanced sentiment analysis with gradual classification"""
 
 
 
 
 
 
67
 
68
+ # Using sets for O(1) lookup
69
+ INDICATORS = {
70
+ 'positive': {
71
+ '🔥', '❤️', '👍', '😊', '💪', '👏', '🎉', '♥️', '😍', '🙏',
72
+ 'круто', 'супер', 'класс', 'огонь', 'пушка', 'отлично', 'здорово',
73
+ 'прекрасно', 'молодец', 'красота', 'спасибо', 'топ', 'лучший',
74
+ 'amazing', 'wonderful', 'great', 'perfect', 'love', 'beautiful'
75
+ },
76
+ 'negative': {
77
+ '👎', '😢', '😞', '😠', '😡', '💔', '😕', '😑',
78
+ 'плохо', 'ужас', 'отстой', 'фу', 'жесть', 'ужасно',
79
+ 'разочарован', 'печаль', 'грустно', 'bad', 'worst',
80
+ 'terrible', 'awful', 'sad', 'disappointed'
81
+ }
82
  }
83
 
84
  @classmethod
85
+ def analyze(cls, text: str) -> Sentiment:
86
+ """
87
+ Analyze text sentiment with enhanced granularity and emphasis handling
88
+ """
89
  text_lower = text.lower()
90
+ words = set(cls.TextAnalyzer.get_words(text_lower))
 
 
 
91
 
92
+ pos_count = len(words & cls.INDICATORS['positive'])
93
+ neg_count = len(words & cls.INDICATORS['negative'])
94
+
95
+ # Calculate emphasis multiplier based on punctuation
96
+ emphasis = min(text.count('!') * 0.2 + text.count('?') * 0.1, 1.0)
97
+
98
+ # Apply emphasis to the dominant sentiment
99
  if pos_count > neg_count:
100
+ pos_count *= (1 + emphasis)
101
  elif neg_count > pos_count:
102
+ neg_count *= (1 + emphasis)
103
 
104
+ # Determine sentiment with granularity
105
+ total = pos_count + neg_count
106
+ if total == 0:
107
+ return Sentiment.NEUTRAL
108
+
109
+ ratio = pos_count / total
110
+ if ratio > 0.8:
111
+ return Sentiment.POSITIVE
112
+ elif ratio > 0.6:
113
+ return Sentiment.SLIGHTLY_POSITIVE
114
+ elif ratio < 0.2:
115
+ return Sentiment.NEGATIVE
116
+ elif ratio < 0.4:
117
+ return Sentiment.SLIGHTLY_NEGATIVE
118
+ return Sentiment.NEUTRAL
119
 
120
  class CommentExtractor:
121
+ """Enhanced comment data extraction"""
122
+
123
+ class ParseError(Exception):
124
+ """Custom exception for parsing errors"""
125
+ pass
126
+
127
+ # Optimized patterns with named groups
128
  PATTERNS = {
129
+ 'username': re.compile(r"""
130
+ (?:
131
+ Фото\sпрофиля\s(?P<name1>[^\n]+)|
132
+ ^(?P<name2>[^\s]+)\s+|
133
+ @(?P<name3>[^\s]+)\s+
134
+ )
135
+ """, re.VERBOSE),
136
+
137
+ 'time': re.compile(r"""
138
+ (?P<value>\d+)\s*
139
+ (?P<unit>(?:ч|нед|h|w|час|hour|week))\.?
140
+ """, re.VERBOSE),
141
+
142
+ 'likes': re.compile(r"""
143
+ (?:
144
+ (?P<count1>\d+)\s*отметк[аи]\s\"Нравится\"|
145
+ Нравится:\s*(?P<count2>\d+)|
146
+ \"Нравится\":\s*(?P<count3>\d+)|
147
+ likes?:\s*(?P<count4>\d+)
148
+ )
149
+ """, re.VERBOSE),
150
+
151
+ 'metadata': re.compile(r"""
152
+ Фото\sпрофиля[^\n]+\n|
153
+ \d+\s*(?:ч|нед|h|w|час|hour|week)\.?|
154
+ (?:Нравится|likes?):\s*\d+|
155
+ \d+\s*отметк[аи]\s\"Нравится\"|
156
+ Ответить|
157
+ Показать\sперевод|
158
+ Скрыть\sвсе\sответы|
159
+ Смотреть\sвсе\sответы\s\(\d+\)
160
+ """, re.VERBOSE)
161
  }
162
 
163
  @classmethod
164
+ def extract_data(cls, comment_text: str) -> Optional[CommentData]:
165
+ """Extract comment data with improved error handling"""
166
  try:
167
+ # Extract username
168
+ username_match = cls.PATTERNS['username'].search(comment_text)
169
+ if not username_match:
170
+ raise cls.ParseError("Could not extract username")
 
 
 
 
 
171
 
172
+ username = next(
173
+ name for name in username_match.groups()
174
+ if name is not None
175
+ ).strip()
176
+
177
+ # Clean comment text
178
+ comment = cls.PATTERNS['metadata'].sub('', comment_text)
179
  comment = TextAnalyzer.clean_text(comment)
180
 
181
+ # Extract time
182
+ time_match = cls.PATTERNS['time'].search(comment_text)
183
+ if not time_match:
184
+ weeks = 0
185
+ else:
186
+ value = int(time_match.group('value'))
187
+ unit = time_match.group('unit')
188
+ weeks = value if unit in {'нед', 'w', 'week'} else value / (24 * 7)
189
+
190
+ # Extract likes
191
+ likes_match = cls.PATTERNS['likes'].search(comment_text)
192
+ likes = next(
193
+ (int(count) for count in likes_match.groups() if count),
194
+ 0
195
+ ) if likes_match else 0
196
+
197
+ # Analyze sentiment
198
+ sentiment = SentimentAnalyzer.analyze(comment)
199
 
200
+ return CommentData(
201
+ username=username,
202
+ text=comment,
203
+ likes=likes,
204
+ weeks_ago=weeks,
205
+ sentiment=sentiment
206
+ )
 
207
 
208
+ except cls.ParseError as e:
209
+ logger.warning(f"Failed to parse comment: {e}")
210
+ return None
211
  except Exception as e:
212
+ logger.error(f"Unexpected error parsing comment: {e}", exc_info=True)
213
+ return None
214
 
215
  class StatsCalculator:
216
+ """Enhanced statistics calculation"""
217
+
218
  @staticmethod
219
+ def calculate_period_stats(comments: List[CommentData]) -> Dict:
220
+ """Calculate statistics using quantile-based periods"""
221
+ if not comments:
222
  return {}
223
 
224
+ # Sort by weeks
225
+ sorted_comments = sorted(comments, key=lambda x: x.weeks_ago)
226
+
227
+ # Calculate period boundaries using quantiles
228
+ weeks = [c.weeks_ago for c in sorted_comments]
229
+ boundaries = np.quantile(weeks, [0.33, 0.67])
230
 
231
+ # Group comments by period
232
+ periods = {
233
  'early': [],
234
  'middle': [],
235
  'late': []
236
  }
237
 
238
+ for comment in sorted_comments:
239
+ if comment.weeks_ago <= boundaries[0]:
240
+ periods['early'].append(comment)
241
+ elif comment.weeks_ago <= boundaries[1]:
242
+ periods['middle'].append(comment)
243
  else:
244
+ periods['late'].append(comment)
245
 
246
+ # Calculate statistics for each period
247
  return {
248
  period: {
249
+ 'comments': len(comments),
250
+ 'avg_likes': statistics.mean(c.likes for c in comments) if comments else 0,
251
+ 'sentiment_ratio': sum(
252
+ 1 for c in comments
253
+ if c.sentiment in {Sentiment.POSITIVE, Sentiment.SLIGHTLY_POSITIVE}
254
+ ) / len(comments) if comments else 0
255
  }
256
+ for period, comments in periods.items()
257
  }
258
 
259
+ def analyze_post(
260
+ content_type: str,
261
+ link_to_post: str,
262
+ post_likes: int,
263
+ post_date: str,
264
+ description: str,
265
+ comment_count: int,
266
+ all_comments: str
267
+ ) -> Tuple[str, str, str, str, str]:
268
+ """Enhanced post analysis with improved error handling and reporting"""
269
  try:
270
+ # Split comments using optimized pattern
271
+ comment_pattern = re.compile(
272
+ r'(?=Фото профиля|\n\s*[a-zA-Z0-9._]+\s+|\b@[a-zA-Z0-9._]+\s+)',
273
+ re.MULTILINE
274
+ )
275
+ comments_blocks = [
276
+ block.strip() for block in comment_pattern.split(all_comments)
277
+ if block and block.strip() and 'Скрыто алгоритмами Instagram' not in block
278
+ ]
279
 
280
+ # Extract and validate comment data
281
+ comments_data = []
282
+ for block in comments_blocks:
283
+ if data := CommentExtractor.extract_data(block):
284
+ comments_data.append(data)
285
 
286
+ if not comments_data:
287
+ logger.warning("No valid comments found in the input")
288
+ return "No valid comments found", "", "", "", "0"
 
 
289
 
290
+ # Calculate statistics
 
 
 
 
 
 
 
 
 
291
  basic_stats = {
292
+ 'total_comments': len(comments_data),
293
+ 'avg_length': statistics.mean(len(c.text) for c in comments_data),
294
+ 'median_length': statistics.median(len(c.text) for c in comments_data),
295
+ 'avg_words': statistics.mean(len(TextAnalyzer.get_words(c.text)) for c in comments_data),
296
+ 'total_likes': sum(c.likes for c in comments_data),
297
+ 'avg_likes': statistics.mean(c.likes for c in comments_data)
298
  }
299
 
300
+ # Generate reports
301
+ reports = generate_reports(
302
+ content_type=content_type,
303
+ link_to_post=link_to_post,
304
+ post_likes=post_likes,
305
+ comments_data=comments_data,
306
+ basic_stats=basic_stats
307
+ )
308
 
309
  return (
310
+ reports['analytics'],
311
+ "\n".join(c.username for c in comments_data),
312
+ "\n".join(c.text for c in comments_data),
313
+ "\n".join(str(c.likes) for c in comments_data),
314
  str(basic_stats['total_likes'])
315
  )
316
 
317
  except Exception as e:
318
+ logger.error(f"Error analyzing post: {e}", exc_info=True)
319
+ return f"Error analyzing post: {str(e)}", "", "", "", "0"
320
 
321
+ def generate_reports(
322
+ content_type: str,
323
+ link_to_post: str,
324
+ post_likes: int,
325
+ comments_data: List[CommentData],
326
+ basic_stats: Dict
327
+ ) -> Dict[str, str]:
328
+ """Generate comprehensive reports in multiple formats"""
329
+
330
+ # Calculate additional statistics
331
+ sentiment_dist = Counter(c.sentiment for c in comments_data)
332
+ period_stats = StatsCalculator.calculate_period_stats(comments_data)
333
+ top_users = Counter(c.username for c in comments_data).most_common(5)
334
+ top_mentioned = Counter(
335
+ mention for c in comments_data
336
+ for mention in TextAnalyzer.extract_mentions(c.text)
337
+ ).most_common(5)
338
 
339
+ # Generate CSV report
340
+ csv_output = StringIO()
341
+ writer = csv.writer(csv_output)
342
+
343
+ # Write metadata
344
+ writer.writerow(['Content Analysis Report'])
345
+ writer.writerow(['Generated', datetime.now().isoformat()])
346
+ writer.writerow(['Content Type', content_type])
347
+ writer.writerow(['Post URL', link_to_post])
348
+ writer.writerow(['Post Likes', post_likes])
349
+ writer.writerow([])
350
+
351
+ # Write statistics sections
352
+ for section, data in {
353
+ 'Basic Statistics': basic_stats,
354
+ 'Sentiment Distribution': sentiment_dist,
355
+ 'Period Analysis': period_stats,
356
+ 'Top Users': dict(top_users),
357
+ 'Top Mentioned': dict(top_mentioned)
358
+ }.items():
359
  writer.writerow([section])
360
  for key, value in data.items():
361
  writer.writerow([key, value])
362
  writer.writerow([])
363
+
364
+ # Generate text report
365
+ text_report = (
366
+ f"ANALYSIS REPORT\n"
367
+ f"Generated: {datetime.now():%Y-%m-%d %H:%M:%S}\n\n"
368
+ f"BASIC STATISTICS:\n"
369
+ f"- Total Comments: {basic_stats['total_comments']}\n"
370
+ f"- Average Likes: {basic_stats['avg_likes']:.1f}\n"
371
+ f"- Average Length: {basic_stats['avg_length']:.1f} characters\n"
372
+ f"- Median Length: {basic_stats['median_length']}\n"
373
+ f"- Average Words: {basic_stats['avg_words']:.1f}\n\n"
374
+ f"SENTIMENT ANALYSIS:\n"
375
+ f"- Positive: {sentiment_dist[Sentiment.POSITIVE]}\n"
376
+ f"- Slightly Positive: {sentiment_dist[Sentiment.SLIGHTLY_POSITIVE]}\n"
377
+ f"- Neutral: {sentiment_dist[Sentiment.NEUTRAL]}\n"
378
+ f"- Slightly Negative: {sentiment_dist[Sentiment.SLIGHTLY_NEGATIVE]}\n"
379
+ f"- Negative: {sentiment_dist[Sentiment.NEGATIVE]}\n\n"
380
+ f"TOP CONTRIBUTORS:\n" +
381
+ "\n".join(f"- {user}: {count} comments" for user, count in top_users) +
382
+ f"\n\nMOST MENTIONED:\n""\n".join(f"- {user}: {count} mentions" for user, count in top_mentioned) +
383
+ f"\n\nENGAGEMENT PERIODS:\n"
384
+ f"Early Period:\n"
385
+ f"- Comments: {period_stats['early']['comments']}\n"
386
+ f"- Avg Likes: {period_stats['early']['avg_likes']:.1f}\n"
387
+ f"- Positive Sentiment: {period_stats['early']['sentiment_ratio']*100:.1f}%\n\n"
388
+ f"Middle Period:\n"
389
+ f"- Comments: {period_stats['middle']['comments']}\n"
390
+ f"- Avg Likes: {period_stats['middle']['avg_likes']:.1f}\n"
391
+ f"- Positive Sentiment: {period_stats['middle']['sentiment_ratio']*100:.1f}%\n\n"
392
+ f"Late Period:\n"
393
+ f"- Comments: {period_stats['late']['comments']}\n"
394
+ f"- Avg Likes: {period_stats['late']['avg_likes']:.1f}\n"
395
+ f"- Positive Sentiment: {period_stats['late']['sentiment_ratio']*100:.1f}%\n"
396
  )
397
+
398
+ return {
399
+ 'csv': csv_output.getvalue(),
400
+ 'analytics': text_report
401
+ }
402
 
403
+ # Gradio interface with improved input validation and error handling
404
  import gradio as gr
405
 
406
+ def validate_input(content_type: str, link: str, likes: int, date: str,
407
+ description: str, comment_count: int, comments: str) -> Tuple[bool, str]:
408
+ """Validate input parameters before processing"""
409
+ if not link:
410
+ return False, "Post link is required"
411
+ if likes < 0:
412
+ return False, "Likes count cannot be negative"
413
+ if comment_count < 0:
414
+ return False, "Comment count cannot be negative"
415
+ if not comments.strip():
416
+ return False, "Comments text is required"
417
+ return True, ""
418
+
419
+ def wrapped_analyze_post(*args):
420
+ """Wrapper for analyze_post with input validation"""
421
+ is_valid, error_message = validate_input(*args)
422
+ if not is_valid:
423
+ return error_message, "", "", "", "0"
424
+
425
+ try:
426
+ return analyze_post(*args)
427
+ except Exception as e:
428
+ logger.error(f"Error in analyze_post wrapper: {e}", exc_info=True)
429
+ return f"An error occurred: {str(e)}", "", "", "", "0"
430
+
431
+ # Create enhanced Gradio interface
432
  iface = gr.Interface(
433
+ fn=wrapped_analyze_post,
434
  inputs=[
435
+ gr.Radio(
436
+ choices=["Photo", "Video", "Reel", "Story"],
437
+ label="Content Type",
438
+ value="Photo"
439
+ ),
440
+ gr.Textbox(
441
+ label="Link to Post",
442
+ placeholder="https://instagram.com/p/..."
443
+ ),
444
+ gr.Number(
445
+ label="Post Likes",
446
+ value=0,
447
+ minimum=0
448
+ ),
449
+ gr.Textbox(
450
+ label="Post Date",
451
+ placeholder="YYYY-MM-DD"
452
+ ),
453
+ gr.Textbox(
454
+ label="Post Description",
455
+ lines=3,
456
+ placeholder="Enter post description..."
457
+ ),
458
+ gr.Number(
459
+ label="Total Comment Count",
460
+ value=0,
461
+ minimum=0
462
+ ),
463
+ gr.Textbox(
464
+ label="Comments",
465
+ lines=10,
466
+ placeholder="Paste comments here..."
467
+ )
468
  ],
469
  outputs=[
470
+ gr.Textbox(
471
+ label="Analytics Summary",
472
+ lines=20
473
+ ),
474
+ gr.Textbox(
475
+ label="Extracted Usernames"
476
+ ),
477
+ gr.Textbox(
478
+ label="Cleaned Comments"
479
+ ),
480
+ gr.Textbox(
481
+ label="Comment Likes Timeline"
482
+ ),
483
+ gr.Textbox(
484
+ label="Total Comment Likes"
485
+ )
486
  ],
487
  title="Enhanced Instagram Comment Analyzer",
488
+ description="""
489
+ Analyze Instagram comments with advanced metrics including:
490
+ - Sentiment analysis with granular classification
491
+ - Temporal engagement patterns
492
+ - User interaction statistics
493
+ - Content quality metrics
494
+ """,
495
+ article="""
496
+ ### Usage Instructions
497
+ 1. Select the content type (Photo, Video, Reel, or Story)
498
+ 2. Paste the post URL
499
+ 3. Enter the post metadata (likes, date, description)
500
+ 4. Paste the comments text
501
+ 5. Click submit to generate analysis
502
+
503
+ ### Analysis Features
504
+ - Multi-level sentiment analysis
505
+ - Engagement period breakdown
506
+ - Top contributors and mentions
507
+ - Detailed statistical metrics
508
+
509
+ ### Notes
510
+ - All text fields support Unicode characters including emojis
511
+ - Time references are converted to a standardized format
512
+ - Analysis includes both quantitative and qualitative metrics
513
+ """
514
  )
515
 
516
  if __name__ == "__main__":
517
+ # Configure logging for the main application
518
+ logger.info("Starting Instagram Comment Analyzer")
519
+
520
+ try:
521
+ # Launch the interface with enhanced settings
522
+ iface.launch(
523
+ server_name="0.0.0.0", # Allow external access
524
+ server_port=7860, # Default Gradio port
525
+ share=False, # Disable public URL generation
526
+ debug=False, # Disable debug mode in production
527
+ enable_queue=True, # Enable request queuing
528
+ max_threads=4 # Limit concurrent processing
529
+ )
530
+ except Exception as e:
531
+ logger.error(f"Failed to start application: {e}", exc_info=True)
532
+ raise