boompack commited on
Commit
8a11e5e
·
verified ·
1 Parent(s): 978ab36

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -91
app.py CHANGED
@@ -7,8 +7,7 @@ import logging
7
  from typing import Tuple, List, Optional
8
  import statistics
9
  import csv
10
- from textblob import TextBlob
11
- import numpy as np
12
 
13
  # Настройка логирования
14
  logging.basicConfig(level=logging.INFO)
@@ -35,7 +34,7 @@ def analyze_sentiment(text):
35
  """Расширенный анализ тональности по эмодзи и ключевым словам"""
36
  positive_indicators = ['🔥', '❤️', '👍', '😊', '💪', '👏', '🎉', '♥️', '😍', '🙏',
37
  'круто', 'супер', 'класс', 'огонь', 'пушка', 'отлично', 'здорово',
38
- 'прекрасно', 'молодец', 'красота', 'спасибо', 'топ']
39
  negative_indicators = ['👎', '😢', '😞', '😠', '😡', '💔', '😕', '😑',
40
  'плохо', 'ужас', 'отстой', 'фу', 'жесть', 'ужасно',
41
  'разочарован', 'печаль', 'грустно']
@@ -48,38 +47,26 @@ def analyze_sentiment(text):
48
  positive_count += exclamation_count * 0.5 if positive_count > negative_count else 0
49
  negative_count += exclamation_count * 0.5 if negative_count > positive_count else 0
50
 
51
- # Добавляем анализ через TextBlob для более точной оценки
52
- blob = TextBlob(text)
53
- sentiment_score = blob.sentiment.polarity
54
-
55
- # Комбинируем оба подхода
56
- final_score = (positive_count - negative_count) + sentiment_score
57
-
58
- if final_score > 0:
59
  return 'positive'
60
- elif final_score < 0:
61
  return 'negative'
62
  return 'neutral'
63
 
64
  def extract_comment_data(comment_text):
65
- """
66
- Извлекает данные из отдельного комментария
67
- """
68
  try:
69
- # Проверка на скрытый комментарий
70
  if 'Скрыто алгоритмами Instagram' in comment_text:
71
  username_match = re.search(r"Фото профиля ([^\n]+)", comment_text)
72
  if username_match:
73
  return username_match.group(1).strip(), "", 0, 0
74
-
75
- # Извлекаем имя пользователя
76
  username_match = re.search(r"Фото профиля ([^\n]+)", comment_text)
77
  if not username_match:
78
  return None, None, 0, 0
79
 
80
  username = username_match.group(1).strip()
81
 
82
- # Улучшенное извлечение текста комментария
83
  comment_pattern = fr"{re.escape(username)}\n(.*?)(?:\d+ нед\.)"
84
  comment_match = re.search(comment_pattern, comment_text, re.DOTALL)
85
  if comment_match:
@@ -89,16 +76,13 @@ def extract_comment_data(comment_text):
89
  else:
90
  comment = ""
91
 
92
- # Извлекаем количество недель
93
  week_match = re.search(r'(\d+) нед\.', comment_text)
94
  weeks = int(week_match.group(1)) if week_match else 0
95
 
96
- # Извлекаем лайки с улучшенным поиском
97
  likes = 0
98
  likes_patterns = [
99
  r"(\d+) отметк[аи] \"Нравится\"",
100
  r"Нравится: (\d+)",
101
- r"\"Нравится\": (\d+)",
102
  ]
103
 
104
  for pattern in likes_patterns:
@@ -118,13 +102,14 @@ def analyze_post(content_type, link_to_post, post_likes, post_date, description,
118
  comments_blocks = re.split(r'(?=Фото профиля|Скрыто алгоритмами Instagram)', all_comments)
119
  comments_blocks = [block for block in comments_blocks if block.strip()]
120
 
121
- # Основные списки для данных
 
 
122
  usernames = []
123
  comments = []
124
  likes = []
125
  weeks = []
126
 
127
- # Дополнительные метрики
128
  total_emojis = 0
129
  mentions = []
130
  sentiments = []
@@ -132,11 +117,12 @@ def analyze_post(content_type, link_to_post, post_likes, post_date, description,
132
  words_per_comment = []
133
  all_words = []
134
  user_engagement = {}
135
- reply_chains = []
136
- current_chain = []
137
 
138
- # Обработка каждого комментария
139
  for block in comments_blocks:
 
 
 
140
  username, comment, like_count, week_number = extract_comment_data(block)
141
  if username and (comment is not None):
142
  usernames.append(username)
@@ -144,28 +130,16 @@ def analyze_post(content_type, link_to_post, post_likes, post_date, description,
144
  likes.append(str(like_count))
145
  weeks.append(week_number)
146
 
147
- # Базовые метрики
148
  total_emojis += count_emojis(comment)
149
- comment_mentions = extract_mentions(comment)
150
- mentions.extend(comment_mentions)
151
  sentiment = analyze_sentiment(comment)
152
  sentiments.append(sentiment)
153
  comment_lengths.append(len(comment))
154
 
155
- # Анализ цепочек ответов
156
- if comment_mentions:
157
- current_chain.append((username, comment_mentions[0]))
158
- else:
159
- if current_chain:
160
- reply_chains.append(current_chain)
161
- current_chain = []
162
-
163
- # Расширенные метрики
164
  words = get_comment_words(comment)
165
  words_per_comment.append(len(words))
166
  all_words.extend(words)
167
 
168
- # Статистика пользователя
169
  if username not in user_engagement:
170
  user_engagement[username] = {
171
  'comments': 0,
@@ -173,9 +147,7 @@ def analyze_post(content_type, link_to_post, post_likes, post_date, description,
173
  'emoji_usage': 0,
174
  'avg_length': 0,
175
  'sentiments': [],
176
- 'mentions_received': 0,
177
- 'mentions_made': len(comment_mentions),
178
- 'response_time': []
179
  }
180
  user_stats = user_engagement[username]
181
  user_stats['comments'] += 1
@@ -183,74 +155,141 @@ def analyze_post(content_type, link_to_post, post_likes, post_date, description,
183
  user_stats['emoji_usage'] += count_emojis(comment)
184
  user_stats['avg_length'] += len(comment)
185
  user_stats['sentiments'].append(sentiment)
 
186
 
187
- # Финализируем цепочки ответов
188
- if current_chain:
189
- reply_chains.append(current_chain)
190
-
191
- # Обновляем статистику пользователей
 
192
  for username in user_engagement:
193
  stats = user_engagement[username]
194
  stats['avg_length'] /= stats['comments']
195
  stats['engagement_rate'] = stats['total_likes'] / stats['comments']
196
  stats['sentiment_ratio'] = sum(1 for s in stats['sentiments'] if s == 'positive') / len(stats['sentiments'])
197
- stats['mentions_received'] = sum(1 for m in mentions if m == f'@{username}')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
 
199
  # Экспериментальная аналитика
200
- experimental_metrics = {
201
- 'conversation_depth': len(max(reply_chains, key=len)) if reply_chains else 0,
202
- 'avg_response_time': np.mean([c['avg_length'] for c in user_engagement.values()]),
203
- 'engagement_consistency': np.std([c['comments'] for c in user_engagement.values()]),
204
- 'user_interaction_score': len([c for c in comments if any(mention in c for mention in mentions)]) / len(comments),
205
- 'sentiment_volatility': np.std([1 if s == 'positive' else -1 if s == 'negative' else 0 for s in sentiments]),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  }
207
 
208
- # Форматируем данные для CSV
209
  csv_data = {
210
- 'post_url': link_to_post,
211
- 'total_comments': len(comments),
212
- 'total_likes': sum(map(int, likes)),
213
- 'avg_likes_per_comment': sum(map(int, likes)) / len(comments),
214
- 'unique_users': len(set(usernames)),
215
- 'emoji_rate': total_emojis / len(comments),
216
- 'avg_comment_length': sum(comment_lengths) / len(comments),
217
- 'positive_sentiment_ratio': sum(1 for s in sentiments if s == 'positive') / len(sentiments),
218
- 'mention_rate': len(mentions) / len(comments),
219
- 'conversation_depth': experimental_metrics['conversation_depth'],
220
- 'user_interaction_score': experimental_metrics['user_interaction_score'],
221
- 'sentiment_volatility': experimental_metrics['sentiment_volatility'],
 
 
 
 
 
 
 
 
 
 
 
 
222
  }
223
 
224
- # Форматируем вывод для CSV
225
- csv_output = ",".join([f"{k}:{v}" for k, v in csv_data.items()])
 
 
 
 
 
 
 
226
 
227
- # Форматируем детальную аналитику
228
  analytics_summary = (
229
- f"CSV_DATA\n{csv_output}\n\n"
230
- f"DETAILED_ANALYTICS\n"
231
- f"Content Type: {content_type}\n"
232
- f"Link to Post: {link_to_post}\n\n"
233
- f"BASIC_STATS\n"
234
- f"Total Comments: {len(comments)}\n"
235
- f"Total Likes: {sum(map(int, likes))}\n"
236
- f"Unique Users: {len(set(usernames))}\n"
237
- f"Activity Period: {max(weeks)}-{min(weeks)} weeks\n\n"
238
- f"CONTENT_ANALYSIS\n"
239
- f"Avg Comment Length: {sum(comment_lengths) / len(comments):.1f}\n"
240
- f"Total Emojis: {total_emojis}\n"
241
- f"Sentiment Distribution: {Counter(sentiments)}\n\n"
242
- f"EXPERIMENTAL_METRICS\n"
243
- f"Conversation Depth: {experimental_metrics['conversation_depth']}\n"
244
- f"User Interaction Score: {experimental_metrics['user_interaction_score']:.2f}\n"
245
- f"Sentiment Volatility: {experimental_metrics['sentiment_volatility']:.2f}\n"
246
- f"Engagement Consistency: {experimental_metrics['engagement_consistency']:.2f}\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
  )
248
 
249
- return analytics_summary, usernames_output, comments_output, likes_chronology_output, str(sum(map(int, likes)))
250
 
251
  except Exception as e:
252
  logger.error(f"Error in analyze_post: {e}", exc_info=True)
253
- return str(e), "", "", "", "0"
254
 
255
  # Создаем интерфейс Gradio
256
  iface = gr.Interface(
 
7
  from typing import Tuple, List, Optional
8
  import statistics
9
  import csv
10
+ from io import StringIO
 
11
 
12
  # Настройка логирования
13
  logging.basicConfig(level=logging.INFO)
 
34
  """Расширенный анализ тональности по эмодзи и ключевым словам"""
35
  positive_indicators = ['🔥', '❤️', '👍', '😊', '💪', '👏', '🎉', '♥️', '😍', '🙏',
36
  'круто', 'супер', 'класс', 'огонь', 'пушка', 'отлично', 'здорово',
37
+ 'прекрасно', 'молодец', 'красота', 'спасибо', 'топ', 'лучший']
38
  negative_indicators = ['👎', '😢', '😞', '😠', '😡', '💔', '😕', '😑',
39
  'плохо', 'ужас', 'отстой', 'фу', 'жесть', 'ужасно',
40
  'разочарован', 'печаль', 'грустно']
 
47
  positive_count += exclamation_count * 0.5 if positive_count > negative_count else 0
48
  negative_count += exclamation_count * 0.5 if negative_count > positive_count else 0
49
 
50
+ if positive_count > negative_count:
 
 
 
 
 
 
 
51
  return 'positive'
52
+ elif negative_count > positive_count:
53
  return 'negative'
54
  return 'neutral'
55
 
56
  def extract_comment_data(comment_text):
57
+ """Извлекает данные из отдельного комментария"""
 
 
58
  try:
 
59
  if 'Скрыто алгоритмами Instagram' in comment_text:
60
  username_match = re.search(r"Фото профиля ([^\n]+)", comment_text)
61
  if username_match:
62
  return username_match.group(1).strip(), "", 0, 0
63
+
 
64
  username_match = re.search(r"Фото профиля ([^\n]+)", comment_text)
65
  if not username_match:
66
  return None, None, 0, 0
67
 
68
  username = username_match.group(1).strip()
69
 
 
70
  comment_pattern = fr"{re.escape(username)}\n(.*?)(?:\d+ нед\.)"
71
  comment_match = re.search(comment_pattern, comment_text, re.DOTALL)
72
  if comment_match:
 
76
  else:
77
  comment = ""
78
 
 
79
  week_match = re.search(r'(\d+) нед\.', comment_text)
80
  weeks = int(week_match.group(1)) if week_match else 0
81
 
 
82
  likes = 0
83
  likes_patterns = [
84
  r"(\d+) отметк[аи] \"Нравится\"",
85
  r"Нравится: (\d+)",
 
86
  ]
87
 
88
  for pattern in likes_patterns:
 
102
  comments_blocks = re.split(r'(?=Фото профиля|Скрыто алгоритмами Instagram)', all_comments)
103
  comments_blocks = [block for block in comments_blocks if block.strip()]
104
 
105
+ # Подсчет скрытых комментариев
106
+ hidden_comments = len(re.findall(r'Скрыто алгоритмами Instagram', all_comments))
107
+
108
  usernames = []
109
  comments = []
110
  likes = []
111
  weeks = []
112
 
 
113
  total_emojis = 0
114
  mentions = []
115
  sentiments = []
 
117
  words_per_comment = []
118
  all_words = []
119
  user_engagement = {}
 
 
120
 
121
+ # Обработка комментариев
122
  for block in comments_blocks:
123
+ if 'Скрыто алгоритмами Instagram' in block:
124
+ continue
125
+
126
  username, comment, like_count, week_number = extract_comment_data(block)
127
  if username and (comment is not None):
128
  usernames.append(username)
 
130
  likes.append(str(like_count))
131
  weeks.append(week_number)
132
 
 
133
  total_emojis += count_emojis(comment)
134
+ mentions.extend(extract_mentions(comment))
 
135
  sentiment = analyze_sentiment(comment)
136
  sentiments.append(sentiment)
137
  comment_lengths.append(len(comment))
138
 
 
 
 
 
 
 
 
 
 
139
  words = get_comment_words(comment)
140
  words_per_comment.append(len(words))
141
  all_words.extend(words)
142
 
 
143
  if username not in user_engagement:
144
  user_engagement[username] = {
145
  'comments': 0,
 
147
  'emoji_usage': 0,
148
  'avg_length': 0,
149
  'sentiments': [],
150
+ 'weeks': [] # Добавлено для анализа временной активности
 
 
151
  }
152
  user_stats = user_engagement[username]
153
  user_stats['comments'] += 1
 
155
  user_stats['emoji_usage'] += count_emojis(comment)
156
  user_stats['avg_length'] += len(comment)
157
  user_stats['sentiments'].append(sentiment)
158
+ user_stats['weeks'].append(week_number)
159
 
160
+ # Проверка количества комментариев
161
+ total_comments = len(comments)
162
+ if total_comments != comment_count:
163
+ logger.warning(f"Found {total_comments} comments, but expected {comment_count}")
164
+
165
+ # Обновление статистики пользователей
166
  for username in user_engagement:
167
  stats = user_engagement[username]
168
  stats['avg_length'] /= stats['comments']
169
  stats['engagement_rate'] = stats['total_likes'] / stats['comments']
170
  stats['sentiment_ratio'] = sum(1 for s in stats['sentiments'] if s == 'positive') / len(stats['sentiments'])
171
+ stats['activity_period'] = max(stats['weeks']) - min(stats['weeks']) if stats['weeks'] else 0
172
+
173
+ # Расчет базовой статистики
174
+ avg_comment_length = sum(comment_lengths) / total_comments
175
+ sentiment_distribution = Counter(sentiments)
176
+ most_active_users = Counter(usernames).most_common(5)
177
+ most_mentioned = Counter(mentions).most_common(5)
178
+ avg_likes = sum(map(int, likes)) / len(likes) if likes else 0
179
+ earliest_week = max(weeks) if weeks else 0
180
+ latest_week = min(weeks) if weeks else 0
181
+
182
+ # Расширенная статистика
183
+ median_comment_length = statistics.median(comment_lengths)
184
+ avg_words_per_comment = sum(words_per_comment) / total_comments
185
+ common_words = Counter(all_words).most_common(10)
186
 
187
  # Экспериментальная аналитика
188
+ engagement_periods = {
189
+ 'early': [],
190
+ 'middle': [],
191
+ 'late': []
192
+ }
193
+ week_range = max(weeks) - min(weeks) if weeks else 0
194
+ period_length = week_range / 3 if week_range > 0 else 1
195
+
196
+ for i, week in enumerate(weeks):
197
+ if week >= max(weeks) - period_length:
198
+ engagement_periods['early'].append(i)
199
+ elif week >= max(weeks) - 2 * period_length:
200
+ engagement_periods['middle'].append(i)
201
+ else:
202
+ engagement_periods['late'].append(i)
203
+
204
+ period_stats = {
205
+ period: {
206
+ 'comments': len(indices),
207
+ 'avg_likes': sum(int(likes[i]) for i in indices) / len(indices) if indices else 0,
208
+ 'sentiment_ratio': sum(1 for i in indices if sentiments[i] == 'positive') / len(indices) if indices else 0
209
+ }
210
+ for period, indices in engagement_periods.items()
211
  }
212
 
213
+ # Подготовка данных для CSV
214
  csv_data = {
215
+ 'metadata': {
216
+ 'content_type': content_type,
217
+ 'link': link_to_post,
218
+ 'post_likes': post_likes,
219
+ 'post_date': post_date,
220
+ 'total_comments': total_comments,
221
+ 'expected_comments': comment_count,
222
+ 'hidden_comments': hidden_comments
223
+ },
224
+ 'basic_stats': {
225
+ 'avg_comment_length': avg_comment_length,
226
+ 'median_comment_length': median_comment_length,
227
+ 'avg_words': avg_words_per_comment,
228
+ 'total_emojis': total_emojis,
229
+ 'avg_likes': avg_likes
230
+ },
231
+ 'sentiment_stats': {
232
+ 'positive': sentiment_distribution['positive'],
233
+ 'neutral': sentiment_distribution['neutral'],
234
+ 'negative': sentiment_distribution['negative']
235
+ },
236
+ 'period_analysis': period_stats,
237
+ 'top_users': dict(most_active_users),
238
+ 'top_mentioned': dict(most_mentioned)
239
  }
240
 
241
+ # Создаем CSV строку
242
+ output = StringIO()
243
+ writer = csv.writer(output)
244
+ for section, data in csv_data.items():
245
+ writer.writerow([section])
246
+ for key, value in data.items():
247
+ writer.writerow([key, value])
248
+ writer.writerow([])
249
+ csv_output = output.getvalue()
250
 
251
+ # Формируем текстовый отчет
252
  analytics_summary = (
253
+ f"CSV DATA:\n{csv_output}\n\n"
254
+ f"ДЕТАЛЬНЫЙ АНАЛИЗ:\n"
255
+ f"Контент: {content_type}\n"
256
+ f"Ссылка: {link_to_post}\n\n"
257
+ f"СТАТИСТИКА:\n"
258
+ f"- Всего комментариев: {total_comments} (ожидалось: {comment_count})\n"
259
+ f"- Скрытых комментариев: {hidden_comments}\n"
260
+ f"- Всего лайков: {sum(map(int, likes))}\n"
261
+ f"- Среднее лайков: {avg_likes:.1f}\n"
262
+ f"- Период: {earliest_week}-{latest_week} недель\n\n"
263
+ f"АНАЛИЗ КОНТЕНТА:\n"
264
+ f"- Средняя длина: {avg_comment_length:.1f} символов\n"
265
+ f"- Медиана длины: {median_comment_length} символов\n"
266
+ f"- Среднее слов: {avg_words_per_comment:.1f}\n"
267
+ f"- Эмодзи: {total_emojis}\n"
268
+ f"- Тональность:\n"
269
+ f" * Позитив: {sentiment_distribution['positive']}\n"
270
+ f" * Нейтрально: {sentiment_distribution['neutral']}\n"
271
+ f" * Негатив: {sentiment_distribution['negative']}\n\n"
272
+ f"ПОПУЛЯРНЫЕ СЛОВА:\n"
273
+ + "\n".join([f"- {word}: {count}" for word, count in common_words]) + "\n\n"
274
+ f"АКТИВНЫЕ ПОЛЬЗОВАТЕЛИ:\n"
275
+ + "\n".join([f"- {user}: {count}" for user, count in most_active_users]) + "\n\n"
276
+ f"УПОМИНАНИЯ:\n"
277
+ + "\n".join([f"- {user}: {count}" for user, count in most_mentioned if user]) + "\n\n"
278
+ f"АНАЛИЗ ПО ПЕРИОДАМ:\n"
279
+ + "\n".join([f"- {period}: {stats['comments']} комментариев, {stats['avg_likes']:.1f} лайков/коммент, "
280
+ f"{stats['sentiment_ratio']*100:.1f}% позитивных"
281
+ for period, stats in period_stats.items()]) + "\n\n"
282
+ f"ЭКСПЕРИМЕНТАЛЬНАЯ АНАЛИТИКА:\n"
283
+ f"- Самый активный период: {max(period_stats.items(), key=lambda x: x[1]['comments'])[0]}\n"
284
+ f"- Наиболее позитивный период: {max(period_stats.items(), key=lambda x: x[1]['sentiment_ratio'])[0]}\n"
285
+ f"- Период с макс. вовлеченностью: {max(period_stats.items(), key=lambda x: x[1]['avg_likes'])[0]}"
286
  )
287
 
288
+ return analytics_summary, "\n".join(usernames), "\n".join(comments), "\n".join(likes), str(sum(map(int, likes)))
289
 
290
  except Exception as e:
291
  logger.error(f"Error in analyze_post: {e}", exc_info=True)
292
+ return f"Error: {str(e)}", "", "", "", "0"
293
 
294
  # Создаем интерфейс Gradio
295
  iface = gr.Interface(