boompack commited on
Commit
978ab36
·
verified ·
1 Parent(s): 282dd48

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -121
app.py CHANGED
@@ -6,6 +6,9 @@ import emoji
6
  import logging
7
  from typing import Tuple, List, Optional
8
  import statistics
 
 
 
9
 
10
  # Настройка логирования
11
  logging.basicConfig(level=logging.INFO)
@@ -25,9 +28,8 @@ def extract_mentions(text):
25
 
26
  def get_comment_words(text):
27
  """Получает список слов из комментария для анализа"""
28
- # Очищаем текст от эмодзи и приводим к нижнему регистру
29
  words = re.findall(r'\w+', text.lower())
30
- return [w for w in words if len(w) > 2] # Исключаем короткие слова
31
 
32
  def analyze_sentiment(text):
33
  """Расширенный анализ тональности по эмодзи и ключевым словам"""
@@ -42,23 +44,34 @@ def analyze_sentiment(text):
42
  positive_count = sum(1 for ind in positive_indicators if ind in text_lower)
43
  negative_count = sum(1 for ind in negative_indicators if ind in text_lower)
44
 
45
- # Учитываем восклицательные знаки как усилители эмоций
46
  exclamation_count = text.count('!')
47
  positive_count += exclamation_count * 0.5 if positive_count > negative_count else 0
48
  negative_count += exclamation_count * 0.5 if negative_count > positive_count else 0
49
 
50
- if positive_count > negative_count:
 
 
 
 
 
 
 
51
  return 'positive'
52
- elif negative_count > positive_count:
53
  return 'negative'
54
  return 'neutral'
55
 
56
  def extract_comment_data(comment_text):
57
  """
58
  Извлекает данные из отдельного комментария
59
- Возвращает (username, comment_text, likes_count, week_number)
60
  """
61
  try:
 
 
 
 
 
 
62
  # Извлекаем имя пользователя
63
  username_match = re.search(r"Фото профиля ([^\n]+)", comment_text)
64
  if not username_match:
@@ -66,12 +79,12 @@ def extract_comment_data(comment_text):
66
 
67
  username = username_match.group(1).strip()
68
 
69
- # Извлекаем текст комментария
70
- comment_pattern = fr"{username}\n(.*?)(?:\d+ нед\.)"
71
  comment_match = re.search(comment_pattern, comment_text, re.DOTALL)
72
  if comment_match:
73
  comment = clean_text(comment_match.group(1))
74
- comment = re.sub(fr'^{username}\s*', '', comment)
75
  comment = re.sub(r'^@[\w\.]+ ', '', comment)
76
  else:
77
  comment = ""
@@ -80,11 +93,12 @@ def extract_comment_data(comment_text):
80
  week_match = re.search(r'(\d+) нед\.', comment_text)
81
  weeks = int(week_match.group(1)) if week_match else 0
82
 
83
- # Ищем количество лайков
84
  likes = 0
85
  likes_patterns = [
86
  r"(\d+) отметк[аи] \"Нравится\"",
87
  r"Нравится: (\d+)",
 
88
  ]
89
 
90
  for pattern in likes_patterns:
@@ -100,8 +114,8 @@ def extract_comment_data(comment_text):
100
 
101
  def analyze_post(content_type, link_to_post, post_likes, post_date, description, comment_count, all_comments):
102
  try:
103
- # Разделяем комментарии по "Фото профиля"
104
- comments_blocks = re.split(r'(?=Фото профиля)', all_comments)
105
  comments_blocks = [block for block in comments_blocks if block.strip()]
106
 
107
  # Основные списки для данных
@@ -117,12 +131,14 @@ def analyze_post(content_type, link_to_post, post_likes, post_date, description,
117
  comment_lengths = []
118
  words_per_comment = []
119
  all_words = []
120
- user_engagement = {} # Словарь для хранения статистики по пользователям
 
 
121
 
122
  # Обработка каждого комментария
123
  for block in comments_blocks:
124
  username, comment, like_count, week_number = extract_comment_data(block)
125
- if username and comment:
126
  usernames.append(username)
127
  comments.append(comment)
128
  likes.append(str(like_count))
@@ -130,11 +146,20 @@ def analyze_post(content_type, link_to_post, post_likes, post_date, description,
130
 
131
  # Базовые метрики
132
  total_emojis += count_emojis(comment)
133
- mentions.extend(extract_mentions(comment))
 
134
  sentiment = analyze_sentiment(comment)
135
  sentiments.append(sentiment)
136
  comment_lengths.append(len(comment))
137
 
 
 
 
 
 
 
 
 
138
  # Расширенные метрики
139
  words = get_comment_words(comment)
140
  words_per_comment.append(len(words))
@@ -147,7 +172,10 @@ def analyze_post(content_type, link_to_post, post_likes, post_date, description,
147
  'total_likes': 0,
148
  'emoji_usage': 0,
149
  'avg_length': 0,
150
- 'sentiments': []
 
 
 
151
  }
152
  user_stats = user_engagement[username]
153
  user_stats['comments'] += 1
@@ -156,139 +184,95 @@ def analyze_post(content_type, link_to_post, post_likes, post_date, description,
156
  user_stats['avg_length'] += len(comment)
157
  user_stats['sentiments'].append(sentiment)
158
 
159
- # Аналитика
160
- total_comments = len(comments)
161
- if total_comments == 0:
162
- raise ValueError("No valid comments found")
163
-
164
  # Обновляем статистику пользователей
165
  for username in user_engagement:
166
  stats = user_engagement[username]
167
  stats['avg_length'] /= stats['comments']
168
  stats['engagement_rate'] = stats['total_likes'] / stats['comments']
169
  stats['sentiment_ratio'] = sum(1 for s in stats['sentiments'] if s == 'positive') / len(stats['sentiments'])
 
170
 
171
- # Базовая статистика
172
- avg_comment_length = sum(comment_lengths) / total_comments
173
- sentiment_distribution = Counter(sentiments)
174
- most_active_users = Counter(usernames).most_common(5)
175
- most_mentioned = Counter(mentions).most_common(5)
176
- avg_likes = sum(map(int, likes)) / len(likes) if likes else 0
177
- earliest_week = max(weeks) if weeks else 0
178
- latest_week = min(weeks) if weeks else 0
179
-
180
- # Расширенная статистика
181
- median_comment_length = statistics.median(comment_lengths)
182
- avg_words_per_comment = sum(words_per_comment) / total_comments
183
- common_words = Counter(all_words).most_common(10)
184
-
185
- # Анализ вовлеченности
186
- engagement_metrics = {
187
- 'comments_with_likes': sum(1 for l in likes if int(l) > 0),
188
- 'comments_with_emoji': sum(1 for c in comments if count_emojis(c) > 0),
189
- 'comments_with_mentions': sum(1 for c in comments if extract_mentions(c)),
190
- 'avg_engagement_rate': statistics.mean([
191
- stats['engagement_rate'] for stats in user_engagement.values()
192
- ])
193
  }
194
 
195
- # Временной анализ
196
- week_distribution = Counter(weeks)
197
- most_active_weeks = sorted(week_distribution.items(), key=lambda x: x[1], reverse=True)[:3]
 
 
 
 
 
 
 
 
 
 
 
 
198
 
199
- # Формируем выходные данные
200
- usernames_output = "\n".join(usernames)
201
- comments_output = "\n".join(comments)
202
- likes_chronology_output = "\n".join(likes)
203
- total_likes_sum = sum(map(int, likes))
204
 
205
- # Расширенная аналитика
206
  analytics_summary = (
 
 
207
  f"Content Type: {content_type}\n"
208
  f"Link to Post: {link_to_post}\n\n"
209
- f"ОСНОВНАЯ СТАТИСТИКА:\n"
210
- f"- Всего комментариев: {total_comments}\n"
211
- f"- Всего лайков на комментариях: {total_likes_sum}\n"
212
- f"- Среднее количество лайков: {avg_likes:.1f}\n"
213
- f"- Период активности: {earliest_week}-{latest_week} недель\n\n"
214
- f"АНАЛИЗ КОНТЕНТА:\n"
215
- f"- Средняя длина комментария: {avg_comment_length:.1f} символов\n"
216
- f"- Медианная длина комментария: {median_comment_length} символов\n"
217
- f"- Среднее количество слов: {avg_words_per_comment:.1f}\n"
218
- f"- Всего эмодзи использовано: {total_emojis}\n"
219
- f"- Тональность комментариев:\n"
220
- f" * Позитивных: {sentiment_distribution['positive']}\n"
221
- f" * Нейтральных: {sentiment_distribution['neutral']}\n"
222
- f" * Негативных: {sentiment_distribution['negative']}\n\n"
223
- f"ПОПУЛЯРНЫЕ СЛОВА:\n"
224
- + "\n".join([f"- {word}: {count} раз" for word, count in common_words]) + "\n\n"
225
- f"АКТИВНОСТЬ ПОЛЬЗОВАТЕЛЕЙ:\n"
226
- f"Самые активные комментаторы:\n"
227
- + "\n".join([f"- {user}: {count} комментариев" for user, count in most_active_users]) + "\n\n"
228
- f"Самые упоминаемые пользователи:\n"
229
- + "\n".join([f"- {user}: {count} упоминаний" for user, count in most_mentioned if user]) + "\n\n"
230
- f"ВОВЛЕЧЕННОСТЬ:\n"
231
- f"- Процент комментариев с лайками: {(engagement_metrics['comments_with_likes'] / total_comments * 100):.1f}%\n"
232
- f"- Процент комментариев с эмодзи: {(engagement_metrics['comments_with_emoji'] / total_comments * 100):.1f}%\n"
233
- f"- Процент комментариев с упоминаниями: {(engagement_metrics['comments_with_mentions'] / total_comments * 100):.1f}%\n"
234
- f"- Средний рейтинг вовлеченности: {engagement_metrics['avg_engagement_rate']:.2f}\n\n"
235
- f"ВРЕМЕННАЯ АКТИВНОСТЬ:\n"
236
- f"Самые активные недели:\n"
237
- + "\n".join([f"- {week} неделя: {count} комментариев" for week, count in most_active_weeks])
238
  )
239
 
240
- return analytics_summary, usernames_output, comments_output, likes_chronology_output, str(total_likes_sum)
241
 
242
  except Exception as e:
243
  logger.error(f"Error in analyze_post: {e}", exc_info=True)
244
- error_message = f"Произошла ошибка при обработке: {str(e)}\n{str(type(e))}"
245
- return error_message, error_message, error_message, error_message, "0"
246
 
247
  # Создаем интерфейс Gradio
248
  iface = gr.Interface(
249
  fn=analyze_post,
250
  inputs=[
251
- gr.Radio(
252
- choices=["Photo", "Video"],
253
- label="Content Type",
254
- value="Photo"
255
- ),
256
- gr.Textbox(
257
- label="Link to Post",
258
- placeholder="Введите ссылку на пост"
259
- ),
260
- gr.Number(
261
- label="Likes",
262
- value=0
263
- ),
264
- gr.Textbox(
265
- label="Post Date",
266
- placeholder="Введите дату публикации"
267
- ),
268
- gr.Textbox(
269
- label="Description",
270
- placeholder="Введите описание поста",
271
- lines=3
272
- ),
273
- gr.Number(
274
- label="Total Comment Count",
275
- value=0
276
- ),
277
- gr.Textbox(
278
- label="All Comments",
279
- placeholder="Вставьте комментарии",
280
- lines=10
281
- )
282
  ],
283
  outputs=[
284
  gr.Textbox(label="Analytics Summary", lines=20),
285
- gr.Textbox(label="Usernames (Output 1)", lines=5),
286
- gr.Textbox(label="Comments (Output 2)", lines=5),
287
- gr.Textbox(label="Likes Chronology (Output 3)", lines=5),
288
- gr.Textbox(label="Total Likes on Comments (Output 4)")
289
  ],
290
- title="Instagram Comment Analyzer Pro",
291
- description="Расширенный анализатор комментариев Instagram с детальной аналитикой"
292
  )
293
 
294
  if __name__ == "__main__":
 
6
  import logging
7
  from typing import Tuple, List, Optional
8
  import statistics
9
+ import csv
10
+ from textblob import TextBlob
11
+ import numpy as np
12
 
13
  # Настройка логирования
14
  logging.basicConfig(level=logging.INFO)
 
28
 
29
  def get_comment_words(text):
30
  """Получает список слов из комментария для анализа"""
 
31
  words = re.findall(r'\w+', text.lower())
32
+ return [w for w in words if len(w) > 2]
33
 
34
  def analyze_sentiment(text):
35
  """Расширенный анализ тональности по эмодзи и ключевым словам"""
 
44
  positive_count = sum(1 for ind in positive_indicators if ind in text_lower)
45
  negative_count = sum(1 for ind in negative_indicators if ind in text_lower)
46
 
 
47
  exclamation_count = text.count('!')
48
  positive_count += exclamation_count * 0.5 if positive_count > negative_count else 0
49
  negative_count += exclamation_count * 0.5 if negative_count > positive_count else 0
50
 
51
+ # Добавляем анализ через TextBlob для более точной оценки
52
+ blob = TextBlob(text)
53
+ sentiment_score = blob.sentiment.polarity
54
+
55
+ # Комбинируем оба подхода
56
+ final_score = (positive_count - negative_count) + sentiment_score
57
+
58
+ if final_score > 0:
59
  return 'positive'
60
+ elif final_score < 0:
61
  return 'negative'
62
  return 'neutral'
63
 
64
  def extract_comment_data(comment_text):
65
  """
66
  Извлекает данные из отдельного комментария
 
67
  """
68
  try:
69
+ # Проверка на скрытый комментарий
70
+ if 'Скрыто алгоритмами Instagram' in comment_text:
71
+ username_match = re.search(r"Фото профиля ([^\n]+)", comment_text)
72
+ if username_match:
73
+ return username_match.group(1).strip(), "", 0, 0
74
+
75
  # Извлекаем имя пользователя
76
  username_match = re.search(r"Фото профиля ([^\n]+)", comment_text)
77
  if not username_match:
 
79
 
80
  username = username_match.group(1).strip()
81
 
82
+ # Улучшенное извлечение текста комментария
83
+ comment_pattern = fr"{re.escape(username)}\n(.*?)(?:\d+ нед\.)"
84
  comment_match = re.search(comment_pattern, comment_text, re.DOTALL)
85
  if comment_match:
86
  comment = clean_text(comment_match.group(1))
87
+ comment = re.sub(fr'^{re.escape(username)}\s*', '', comment)
88
  comment = re.sub(r'^@[\w\.]+ ', '', comment)
89
  else:
90
  comment = ""
 
93
  week_match = re.search(r'(\d+) нед\.', comment_text)
94
  weeks = int(week_match.group(1)) if week_match else 0
95
 
96
+ # Извлекаем лайки с улучшенным поиском
97
  likes = 0
98
  likes_patterns = [
99
  r"(\d+) отметк[аи] \"Нравится\"",
100
  r"Нравится: (\d+)",
101
+ r"\"Нравится\": (\d+)",
102
  ]
103
 
104
  for pattern in likes_patterns:
 
114
 
115
  def analyze_post(content_type, link_to_post, post_likes, post_date, description, comment_count, all_comments):
116
  try:
117
+ # Улучшенное разделение комментариев
118
+ comments_blocks = re.split(r'(?=Фото профиля|Скрыто алгоритмами Instagram)', all_comments)
119
  comments_blocks = [block for block in comments_blocks if block.strip()]
120
 
121
  # Основные списки для данных
 
131
  comment_lengths = []
132
  words_per_comment = []
133
  all_words = []
134
+ user_engagement = {}
135
+ reply_chains = []
136
+ current_chain = []
137
 
138
  # Обработка каждого комментария
139
  for block in comments_blocks:
140
  username, comment, like_count, week_number = extract_comment_data(block)
141
+ if username and (comment is not None):
142
  usernames.append(username)
143
  comments.append(comment)
144
  likes.append(str(like_count))
 
146
 
147
  # Базовые метрики
148
  total_emojis += count_emojis(comment)
149
+ comment_mentions = extract_mentions(comment)
150
+ mentions.extend(comment_mentions)
151
  sentiment = analyze_sentiment(comment)
152
  sentiments.append(sentiment)
153
  comment_lengths.append(len(comment))
154
 
155
+ # Анализ цепочек ответов
156
+ if comment_mentions:
157
+ current_chain.append((username, comment_mentions[0]))
158
+ else:
159
+ if current_chain:
160
+ reply_chains.append(current_chain)
161
+ current_chain = []
162
+
163
  # Расширенные метрики
164
  words = get_comment_words(comment)
165
  words_per_comment.append(len(words))
 
172
  'total_likes': 0,
173
  'emoji_usage': 0,
174
  'avg_length': 0,
175
+ 'sentiments': [],
176
+ 'mentions_received': 0,
177
+ 'mentions_made': len(comment_mentions),
178
+ 'response_time': []
179
  }
180
  user_stats = user_engagement[username]
181
  user_stats['comments'] += 1
 
184
  user_stats['avg_length'] += len(comment)
185
  user_stats['sentiments'].append(sentiment)
186
 
187
+ # Финализируем цепочки ответов
188
+ if current_chain:
189
+ reply_chains.append(current_chain)
190
+
 
191
  # Обновляем статистику пользователей
192
  for username in user_engagement:
193
  stats = user_engagement[username]
194
  stats['avg_length'] /= stats['comments']
195
  stats['engagement_rate'] = stats['total_likes'] / stats['comments']
196
  stats['sentiment_ratio'] = sum(1 for s in stats['sentiments'] if s == 'positive') / len(stats['sentiments'])
197
+ stats['mentions_received'] = sum(1 for m in mentions if m == f'@{username}')
198
 
199
+ # Экспериментальная аналитика
200
+ experimental_metrics = {
201
+ 'conversation_depth': len(max(reply_chains, key=len)) if reply_chains else 0,
202
+ 'avg_response_time': np.mean([c['avg_length'] for c in user_engagement.values()]),
203
+ 'engagement_consistency': np.std([c['comments'] for c in user_engagement.values()]),
204
+ 'user_interaction_score': len([c for c in comments if any(mention in c for mention in mentions)]) / len(comments),
205
+ 'sentiment_volatility': np.std([1 if s == 'positive' else -1 if s == 'negative' else 0 for s in sentiments]),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  }
207
 
208
+ # Форматируем данные для CSV
209
+ csv_data = {
210
+ 'post_url': link_to_post,
211
+ 'total_comments': len(comments),
212
+ 'total_likes': sum(map(int, likes)),
213
+ 'avg_likes_per_comment': sum(map(int, likes)) / len(comments),
214
+ 'unique_users': len(set(usernames)),
215
+ 'emoji_rate': total_emojis / len(comments),
216
+ 'avg_comment_length': sum(comment_lengths) / len(comments),
217
+ 'positive_sentiment_ratio': sum(1 for s in sentiments if s == 'positive') / len(sentiments),
218
+ 'mention_rate': len(mentions) / len(comments),
219
+ 'conversation_depth': experimental_metrics['conversation_depth'],
220
+ 'user_interaction_score': experimental_metrics['user_interaction_score'],
221
+ 'sentiment_volatility': experimental_metrics['sentiment_volatility'],
222
+ }
223
 
224
+ # Форматируем вывод для CSV
225
+ csv_output = ",".join([f"{k}:{v}" for k, v in csv_data.items()])
 
 
 
226
 
227
+ # Форматируем детальную аналитику
228
  analytics_summary = (
229
+ f"CSV_DATA\n{csv_output}\n\n"
230
+ f"DETAILED_ANALYTICS\n"
231
  f"Content Type: {content_type}\n"
232
  f"Link to Post: {link_to_post}\n\n"
233
+ f"BASIC_STATS\n"
234
+ f"Total Comments: {len(comments)}\n"
235
+ f"Total Likes: {sum(map(int, likes))}\n"
236
+ f"Unique Users: {len(set(usernames))}\n"
237
+ f"Activity Period: {max(weeks)}-{min(weeks)} weeks\n\n"
238
+ f"CONTENT_ANALYSIS\n"
239
+ f"Avg Comment Length: {sum(comment_lengths) / len(comments):.1f}\n"
240
+ f"Total Emojis: {total_emojis}\n"
241
+ f"Sentiment Distribution: {Counter(sentiments)}\n\n"
242
+ f"EXPERIMENTAL_METRICS\n"
243
+ f"Conversation Depth: {experimental_metrics['conversation_depth']}\n"
244
+ f"User Interaction Score: {experimental_metrics['user_interaction_score']:.2f}\n"
245
+ f"Sentiment Volatility: {experimental_metrics['sentiment_volatility']:.2f}\n"
246
+ f"Engagement Consistency: {experimental_metrics['engagement_consistency']:.2f}\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
  )
248
 
249
+ return analytics_summary, usernames_output, comments_output, likes_chronology_output, str(sum(map(int, likes)))
250
 
251
  except Exception as e:
252
  logger.error(f"Error in analyze_post: {e}", exc_info=True)
253
+ return str(e), "", "", "", "0"
 
254
 
255
  # Создаем интерфейс Gradio
256
  iface = gr.Interface(
257
  fn=analyze_post,
258
  inputs=[
259
+ gr.Radio(choices=["Photo", "Video"], label="Content Type", value="Photo"),
260
+ gr.Textbox(label="Link to Post"),
261
+ gr.Number(label="Likes", value=0),
262
+ gr.Textbox(label="Post Date"),
263
+ gr.Textbox(label="Description", lines=3),
264
+ gr.Number(label="Total Comment Count", value=0),
265
+ gr.Textbox(label="All Comments", lines=10)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  ],
267
  outputs=[
268
  gr.Textbox(label="Analytics Summary", lines=20),
269
+ gr.Textbox(label="Usernames"),
270
+ gr.Textbox(label="Comments"),
271
+ gr.Textbox(label="Likes Chronology"),
272
+ gr.Textbox(label="Total Likes on Comments")
273
  ],
274
+ title="Enhanced Instagram Comment Analyzer",
275
+ description="Анализатор комментариев Instagram с расширенной аналитикой и CSV-форматированием"
276
  )
277
 
278
  if __name__ == "__main__":