Update app.py
Browse files
app.py
CHANGED
@@ -7,8 +7,7 @@ import logging
|
|
7 |
from typing import Tuple, List, Optional
|
8 |
import statistics
|
9 |
import csv
|
10 |
-
from
|
11 |
-
import numpy as np
|
12 |
|
13 |
# Настройка логирования
|
14 |
logging.basicConfig(level=logging.INFO)
|
@@ -35,7 +34,7 @@ def analyze_sentiment(text):
|
|
35 |
"""Расширенный анализ тональности по эмодзи и ключевым словам"""
|
36 |
positive_indicators = ['🔥', '❤️', '👍', '😊', '💪', '👏', '🎉', '♥️', '😍', '🙏',
|
37 |
'круто', 'супер', 'класс', 'огонь', 'пушка', 'отлично', 'здорово',
|
38 |
-
'прекрасно', 'молодец', 'красота', 'спасибо', 'топ']
|
39 |
negative_indicators = ['👎', '😢', '😞', '😠', '😡', '💔', '😕', '😑',
|
40 |
'плохо', 'ужас', 'отстой', 'фу', 'жесть', 'ужасно',
|
41 |
'разочарован', 'печаль', 'грустно']
|
@@ -48,38 +47,26 @@ def analyze_sentiment(text):
|
|
48 |
positive_count += exclamation_count * 0.5 if positive_count > negative_count else 0
|
49 |
negative_count += exclamation_count * 0.5 if negative_count > positive_count else 0
|
50 |
|
51 |
-
|
52 |
-
blob = TextBlob(text)
|
53 |
-
sentiment_score = blob.sentiment.polarity
|
54 |
-
|
55 |
-
# Комбинируем оба подхода
|
56 |
-
final_score = (positive_count - negative_count) + sentiment_score
|
57 |
-
|
58 |
-
if final_score > 0:
|
59 |
return 'positive'
|
60 |
-
elif
|
61 |
return 'negative'
|
62 |
return 'neutral'
|
63 |
|
64 |
def extract_comment_data(comment_text):
|
65 |
-
"""
|
66 |
-
Извлекает данные из отдельного комментария
|
67 |
-
"""
|
68 |
try:
|
69 |
-
# Проверка на скрытый комментарий
|
70 |
if 'Скрыто алгоритмами Instagram' in comment_text:
|
71 |
username_match = re.search(r"Фото профиля ([^\n]+)", comment_text)
|
72 |
if username_match:
|
73 |
return username_match.group(1).strip(), "", 0, 0
|
74 |
-
|
75 |
-
# Извлекаем имя пользователя
|
76 |
username_match = re.search(r"Фото профиля ([^\n]+)", comment_text)
|
77 |
if not username_match:
|
78 |
return None, None, 0, 0
|
79 |
|
80 |
username = username_match.group(1).strip()
|
81 |
|
82 |
-
# Улучшенное извлечение текста комментария
|
83 |
comment_pattern = fr"{re.escape(username)}\n(.*?)(?:\d+ нед\.)"
|
84 |
comment_match = re.search(comment_pattern, comment_text, re.DOTALL)
|
85 |
if comment_match:
|
@@ -89,16 +76,13 @@ def extract_comment_data(comment_text):
|
|
89 |
else:
|
90 |
comment = ""
|
91 |
|
92 |
-
# Извлекаем количество недель
|
93 |
week_match = re.search(r'(\d+) нед\.', comment_text)
|
94 |
weeks = int(week_match.group(1)) if week_match else 0
|
95 |
|
96 |
-
# Извлекаем лайки с улучшенным поиском
|
97 |
likes = 0
|
98 |
likes_patterns = [
|
99 |
r"(\d+) отметк[аи] \"Нравится\"",
|
100 |
r"Нравится: (\d+)",
|
101 |
-
r"\"Нравится\": (\d+)",
|
102 |
]
|
103 |
|
104 |
for pattern in likes_patterns:
|
@@ -118,13 +102,14 @@ def analyze_post(content_type, link_to_post, post_likes, post_date, description,
|
|
118 |
comments_blocks = re.split(r'(?=Фото профиля|Скрыто алгоритмами Instagram)', all_comments)
|
119 |
comments_blocks = [block for block in comments_blocks if block.strip()]
|
120 |
|
121 |
-
#
|
|
|
|
|
122 |
usernames = []
|
123 |
comments = []
|
124 |
likes = []
|
125 |
weeks = []
|
126 |
|
127 |
-
# Дополнительные метрики
|
128 |
total_emojis = 0
|
129 |
mentions = []
|
130 |
sentiments = []
|
@@ -132,11 +117,12 @@ def analyze_post(content_type, link_to_post, post_likes, post_date, description,
|
|
132 |
words_per_comment = []
|
133 |
all_words = []
|
134 |
user_engagement = {}
|
135 |
-
reply_chains = []
|
136 |
-
current_chain = []
|
137 |
|
138 |
-
# Обработка
|
139 |
for block in comments_blocks:
|
|
|
|
|
|
|
140 |
username, comment, like_count, week_number = extract_comment_data(block)
|
141 |
if username and (comment is not None):
|
142 |
usernames.append(username)
|
@@ -144,28 +130,16 @@ def analyze_post(content_type, link_to_post, post_likes, post_date, description,
|
|
144 |
likes.append(str(like_count))
|
145 |
weeks.append(week_number)
|
146 |
|
147 |
-
# Базовые метрики
|
148 |
total_emojis += count_emojis(comment)
|
149 |
-
|
150 |
-
mentions.extend(comment_mentions)
|
151 |
sentiment = analyze_sentiment(comment)
|
152 |
sentiments.append(sentiment)
|
153 |
comment_lengths.append(len(comment))
|
154 |
|
155 |
-
# Анализ цепочек ответов
|
156 |
-
if comment_mentions:
|
157 |
-
current_chain.append((username, comment_mentions[0]))
|
158 |
-
else:
|
159 |
-
if current_chain:
|
160 |
-
reply_chains.append(current_chain)
|
161 |
-
current_chain = []
|
162 |
-
|
163 |
-
# Расширенные метрики
|
164 |
words = get_comment_words(comment)
|
165 |
words_per_comment.append(len(words))
|
166 |
all_words.extend(words)
|
167 |
|
168 |
-
# Статистика пользователя
|
169 |
if username not in user_engagement:
|
170 |
user_engagement[username] = {
|
171 |
'comments': 0,
|
@@ -173,9 +147,7 @@ def analyze_post(content_type, link_to_post, post_likes, post_date, description,
|
|
173 |
'emoji_usage': 0,
|
174 |
'avg_length': 0,
|
175 |
'sentiments': [],
|
176 |
-
'
|
177 |
-
'mentions_made': len(comment_mentions),
|
178 |
-
'response_time': []
|
179 |
}
|
180 |
user_stats = user_engagement[username]
|
181 |
user_stats['comments'] += 1
|
@@ -183,74 +155,141 @@ def analyze_post(content_type, link_to_post, post_likes, post_date, description,
|
|
183 |
user_stats['emoji_usage'] += count_emojis(comment)
|
184 |
user_stats['avg_length'] += len(comment)
|
185 |
user_stats['sentiments'].append(sentiment)
|
|
|
186 |
|
187 |
-
#
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
|
|
192 |
for username in user_engagement:
|
193 |
stats = user_engagement[username]
|
194 |
stats['avg_length'] /= stats['comments']
|
195 |
stats['engagement_rate'] = stats['total_likes'] / stats['comments']
|
196 |
stats['sentiment_ratio'] = sum(1 for s in stats['sentiments'] if s == 'positive') / len(stats['sentiments'])
|
197 |
-
stats['
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
198 |
|
199 |
# Экспериментальная аналитика
|
200 |
-
|
201 |
-
'
|
202 |
-
'
|
203 |
-
'
|
204 |
-
|
205 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
206 |
}
|
207 |
|
208 |
-
#
|
209 |
csv_data = {
|
210 |
-
'
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
'
|
220 |
-
|
221 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
222 |
}
|
223 |
|
224 |
-
#
|
225 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
226 |
|
227 |
-
#
|
228 |
analytics_summary = (
|
229 |
-
f"
|
230 |
-
f"
|
231 |
-
f"
|
232 |
-
f"
|
233 |
-
f"
|
234 |
-
f"
|
235 |
-
f"
|
236 |
-
f"
|
237 |
-
f"
|
238 |
-
f"
|
239 |
-
f"
|
240 |
-
f"
|
241 |
-
f"
|
242 |
-
f"
|
243 |
-
f"
|
244 |
-
f"
|
245 |
-
f"
|
246 |
-
f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
247 |
)
|
248 |
|
249 |
-
return analytics_summary,
|
250 |
|
251 |
except Exception as e:
|
252 |
logger.error(f"Error in analyze_post: {e}", exc_info=True)
|
253 |
-
return str(e), "", "", "", "0"
|
254 |
|
255 |
# Создаем интерфейс Gradio
|
256 |
iface = gr.Interface(
|
|
|
7 |
from typing import Tuple, List, Optional
|
8 |
import statistics
|
9 |
import csv
|
10 |
+
from io import StringIO
|
|
|
11 |
|
12 |
# Настройка логирования
|
13 |
logging.basicConfig(level=logging.INFO)
|
|
|
34 |
"""Расширенный анализ тональности по эмодзи и ключевым словам"""
|
35 |
positive_indicators = ['🔥', '❤️', '👍', '😊', '💪', '👏', '🎉', '♥️', '😍', '🙏',
|
36 |
'круто', 'супер', 'класс', 'огонь', 'пушка', 'отлично', 'здорово',
|
37 |
+
'прекрасно', 'молодец', 'красота', 'спасибо', 'топ', 'лучший']
|
38 |
negative_indicators = ['👎', '😢', '😞', '😠', '😡', '💔', '😕', '😑',
|
39 |
'плохо', 'ужас', 'отстой', 'фу', 'жесть', 'ужасно',
|
40 |
'разочарован', 'печаль', 'грустно']
|
|
|
47 |
positive_count += exclamation_count * 0.5 if positive_count > negative_count else 0
|
48 |
negative_count += exclamation_count * 0.5 if negative_count > positive_count else 0
|
49 |
|
50 |
+
if positive_count > negative_count:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
return 'positive'
|
52 |
+
elif negative_count > positive_count:
|
53 |
return 'negative'
|
54 |
return 'neutral'
|
55 |
|
56 |
def extract_comment_data(comment_text):
|
57 |
+
"""Извлекает данные из отдельного комментария"""
|
|
|
|
|
58 |
try:
|
|
|
59 |
if 'Скрыто алгоритмами Instagram' in comment_text:
|
60 |
username_match = re.search(r"Фото профиля ([^\n]+)", comment_text)
|
61 |
if username_match:
|
62 |
return username_match.group(1).strip(), "", 0, 0
|
63 |
+
|
|
|
64 |
username_match = re.search(r"Фото профиля ([^\n]+)", comment_text)
|
65 |
if not username_match:
|
66 |
return None, None, 0, 0
|
67 |
|
68 |
username = username_match.group(1).strip()
|
69 |
|
|
|
70 |
comment_pattern = fr"{re.escape(username)}\n(.*?)(?:\d+ нед\.)"
|
71 |
comment_match = re.search(comment_pattern, comment_text, re.DOTALL)
|
72 |
if comment_match:
|
|
|
76 |
else:
|
77 |
comment = ""
|
78 |
|
|
|
79 |
week_match = re.search(r'(\d+) нед\.', comment_text)
|
80 |
weeks = int(week_match.group(1)) if week_match else 0
|
81 |
|
|
|
82 |
likes = 0
|
83 |
likes_patterns = [
|
84 |
r"(\d+) отметк[аи] \"Нравится\"",
|
85 |
r"Нравится: (\d+)",
|
|
|
86 |
]
|
87 |
|
88 |
for pattern in likes_patterns:
|
|
|
102 |
comments_blocks = re.split(r'(?=Фото профиля|Скрыто алгоритмами Instagram)', all_comments)
|
103 |
comments_blocks = [block for block in comments_blocks if block.strip()]
|
104 |
|
105 |
+
# Подсчет скрытых комментариев
|
106 |
+
hidden_comments = len(re.findall(r'Скрыто алгоритмами Instagram', all_comments))
|
107 |
+
|
108 |
usernames = []
|
109 |
comments = []
|
110 |
likes = []
|
111 |
weeks = []
|
112 |
|
|
|
113 |
total_emojis = 0
|
114 |
mentions = []
|
115 |
sentiments = []
|
|
|
117 |
words_per_comment = []
|
118 |
all_words = []
|
119 |
user_engagement = {}
|
|
|
|
|
120 |
|
121 |
+
# Обработка комментариев
|
122 |
for block in comments_blocks:
|
123 |
+
if 'Скрыто алгоритмами Instagram' in block:
|
124 |
+
continue
|
125 |
+
|
126 |
username, comment, like_count, week_number = extract_comment_data(block)
|
127 |
if username and (comment is not None):
|
128 |
usernames.append(username)
|
|
|
130 |
likes.append(str(like_count))
|
131 |
weeks.append(week_number)
|
132 |
|
|
|
133 |
total_emojis += count_emojis(comment)
|
134 |
+
mentions.extend(extract_mentions(comment))
|
|
|
135 |
sentiment = analyze_sentiment(comment)
|
136 |
sentiments.append(sentiment)
|
137 |
comment_lengths.append(len(comment))
|
138 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
words = get_comment_words(comment)
|
140 |
words_per_comment.append(len(words))
|
141 |
all_words.extend(words)
|
142 |
|
|
|
143 |
if username not in user_engagement:
|
144 |
user_engagement[username] = {
|
145 |
'comments': 0,
|
|
|
147 |
'emoji_usage': 0,
|
148 |
'avg_length': 0,
|
149 |
'sentiments': [],
|
150 |
+
'weeks': [] # Добавлено для анализа временной активности
|
|
|
|
|
151 |
}
|
152 |
user_stats = user_engagement[username]
|
153 |
user_stats['comments'] += 1
|
|
|
155 |
user_stats['emoji_usage'] += count_emojis(comment)
|
156 |
user_stats['avg_length'] += len(comment)
|
157 |
user_stats['sentiments'].append(sentiment)
|
158 |
+
user_stats['weeks'].append(week_number)
|
159 |
|
160 |
+
# Проверка количества комментариев
|
161 |
+
total_comments = len(comments)
|
162 |
+
if total_comments != comment_count:
|
163 |
+
logger.warning(f"Found {total_comments} comments, but expected {comment_count}")
|
164 |
+
|
165 |
+
# Обновление статистики пользователей
|
166 |
for username in user_engagement:
|
167 |
stats = user_engagement[username]
|
168 |
stats['avg_length'] /= stats['comments']
|
169 |
stats['engagement_rate'] = stats['total_likes'] / stats['comments']
|
170 |
stats['sentiment_ratio'] = sum(1 for s in stats['sentiments'] if s == 'positive') / len(stats['sentiments'])
|
171 |
+
stats['activity_period'] = max(stats['weeks']) - min(stats['weeks']) if stats['weeks'] else 0
|
172 |
+
|
173 |
+
# Расчет базовой статистики
|
174 |
+
avg_comment_length = sum(comment_lengths) / total_comments
|
175 |
+
sentiment_distribution = Counter(sentiments)
|
176 |
+
most_active_users = Counter(usernames).most_common(5)
|
177 |
+
most_mentioned = Counter(mentions).most_common(5)
|
178 |
+
avg_likes = sum(map(int, likes)) / len(likes) if likes else 0
|
179 |
+
earliest_week = max(weeks) if weeks else 0
|
180 |
+
latest_week = min(weeks) if weeks else 0
|
181 |
+
|
182 |
+
# Расширенная статистика
|
183 |
+
median_comment_length = statistics.median(comment_lengths)
|
184 |
+
avg_words_per_comment = sum(words_per_comment) / total_comments
|
185 |
+
common_words = Counter(all_words).most_common(10)
|
186 |
|
187 |
# Экспериментальная аналитика
|
188 |
+
engagement_periods = {
|
189 |
+
'early': [],
|
190 |
+
'middle': [],
|
191 |
+
'late': []
|
192 |
+
}
|
193 |
+
week_range = max(weeks) - min(weeks) if weeks else 0
|
194 |
+
period_length = week_range / 3 if week_range > 0 else 1
|
195 |
+
|
196 |
+
for i, week in enumerate(weeks):
|
197 |
+
if week >= max(weeks) - period_length:
|
198 |
+
engagement_periods['early'].append(i)
|
199 |
+
elif week >= max(weeks) - 2 * period_length:
|
200 |
+
engagement_periods['middle'].append(i)
|
201 |
+
else:
|
202 |
+
engagement_periods['late'].append(i)
|
203 |
+
|
204 |
+
period_stats = {
|
205 |
+
period: {
|
206 |
+
'comments': len(indices),
|
207 |
+
'avg_likes': sum(int(likes[i]) for i in indices) / len(indices) if indices else 0,
|
208 |
+
'sentiment_ratio': sum(1 for i in indices if sentiments[i] == 'positive') / len(indices) if indices else 0
|
209 |
+
}
|
210 |
+
for period, indices in engagement_periods.items()
|
211 |
}
|
212 |
|
213 |
+
# Подготовка данных для CSV
|
214 |
csv_data = {
|
215 |
+
'metadata': {
|
216 |
+
'content_type': content_type,
|
217 |
+
'link': link_to_post,
|
218 |
+
'post_likes': post_likes,
|
219 |
+
'post_date': post_date,
|
220 |
+
'total_comments': total_comments,
|
221 |
+
'expected_comments': comment_count,
|
222 |
+
'hidden_comments': hidden_comments
|
223 |
+
},
|
224 |
+
'basic_stats': {
|
225 |
+
'avg_comment_length': avg_comment_length,
|
226 |
+
'median_comment_length': median_comment_length,
|
227 |
+
'avg_words': avg_words_per_comment,
|
228 |
+
'total_emojis': total_emojis,
|
229 |
+
'avg_likes': avg_likes
|
230 |
+
},
|
231 |
+
'sentiment_stats': {
|
232 |
+
'positive': sentiment_distribution['positive'],
|
233 |
+
'neutral': sentiment_distribution['neutral'],
|
234 |
+
'negative': sentiment_distribution['negative']
|
235 |
+
},
|
236 |
+
'period_analysis': period_stats,
|
237 |
+
'top_users': dict(most_active_users),
|
238 |
+
'top_mentioned': dict(most_mentioned)
|
239 |
}
|
240 |
|
241 |
+
# Создаем CSV строку
|
242 |
+
output = StringIO()
|
243 |
+
writer = csv.writer(output)
|
244 |
+
for section, data in csv_data.items():
|
245 |
+
writer.writerow([section])
|
246 |
+
for key, value in data.items():
|
247 |
+
writer.writerow([key, value])
|
248 |
+
writer.writerow([])
|
249 |
+
csv_output = output.getvalue()
|
250 |
|
251 |
+
# Формируем текстовый отчет
|
252 |
analytics_summary = (
|
253 |
+
f"CSV DATA:\n{csv_output}\n\n"
|
254 |
+
f"ДЕТАЛЬНЫЙ АНАЛИЗ:\n"
|
255 |
+
f"Контент: {content_type}\n"
|
256 |
+
f"Ссылка: {link_to_post}\n\n"
|
257 |
+
f"СТАТИСТИКА:\n"
|
258 |
+
f"- Всего комментариев: {total_comments} (ожидалось: {comment_count})\n"
|
259 |
+
f"- Скрытых комментариев: {hidden_comments}\n"
|
260 |
+
f"- Всего лайков: {sum(map(int, likes))}\n"
|
261 |
+
f"- Среднее лайков: {avg_likes:.1f}\n"
|
262 |
+
f"- Период: {earliest_week}-{latest_week} недель\n\n"
|
263 |
+
f"АНАЛИЗ КОНТЕНТА:\n"
|
264 |
+
f"- Средняя длина: {avg_comment_length:.1f} символов\n"
|
265 |
+
f"- Медиана длины: {median_comment_length} символов\n"
|
266 |
+
f"- Среднее слов: {avg_words_per_comment:.1f}\n"
|
267 |
+
f"- Эмодзи: {total_emojis}\n"
|
268 |
+
f"- Тональность:\n"
|
269 |
+
f" * Позитив: {sentiment_distribution['positive']}\n"
|
270 |
+
f" * Нейтрально: {sentiment_distribution['neutral']}\n"
|
271 |
+
f" * Негатив: {sentiment_distribution['negative']}\n\n"
|
272 |
+
f"ПОПУЛЯРНЫЕ СЛОВА:\n"
|
273 |
+
+ "\n".join([f"- {word}: {count}" for word, count in common_words]) + "\n\n"
|
274 |
+
f"АКТИВНЫЕ ПОЛЬЗОВАТЕЛИ:\n"
|
275 |
+
+ "\n".join([f"- {user}: {count}" for user, count in most_active_users]) + "\n\n"
|
276 |
+
f"УПОМИНАНИЯ:\n"
|
277 |
+
+ "\n".join([f"- {user}: {count}" for user, count in most_mentioned if user]) + "\n\n"
|
278 |
+
f"АНАЛИЗ ПО ПЕРИОДАМ:\n"
|
279 |
+
+ "\n".join([f"- {period}: {stats['comments']} комментариев, {stats['avg_likes']:.1f} лайков/коммент, "
|
280 |
+
f"{stats['sentiment_ratio']*100:.1f}% позитивных"
|
281 |
+
for period, stats in period_stats.items()]) + "\n\n"
|
282 |
+
f"ЭКСПЕРИМЕНТАЛЬНАЯ АНАЛИТИКА:\n"
|
283 |
+
f"- Самый активный период: {max(period_stats.items(), key=lambda x: x[1]['comments'])[0]}\n"
|
284 |
+
f"- Наиболее позитивный период: {max(period_stats.items(), key=lambda x: x[1]['sentiment_ratio'])[0]}\n"
|
285 |
+
f"- Период с макс. вовлеченностью: {max(period_stats.items(), key=lambda x: x[1]['avg_likes'])[0]}"
|
286 |
)
|
287 |
|
288 |
+
return analytics_summary, "\n".join(usernames), "\n".join(comments), "\n".join(likes), str(sum(map(int, likes)))
|
289 |
|
290 |
except Exception as e:
|
291 |
logger.error(f"Error in analyze_post: {e}", exc_info=True)
|
292 |
+
return f"Error: {str(e)}", "", "", "", "0"
|
293 |
|
294 |
# Создаем интерфейс Gradio
|
295 |
iface = gr.Interface(
|