boompack commited on
Commit
e5c8ff6
·
verified ·
1 Parent(s): c433a20

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +236 -189
app.py CHANGED
@@ -3,203 +3,250 @@ import re
3
  from collections import Counter
4
  from datetime import datetime
5
  import emoji
 
 
 
6
 
7
- def clean_text(text):
8
- """Очищает текст от лишних пробелов и переносов строк"""
9
- return ' '.join(text.split())
10
 
11
- def count_emojis(text):
12
- """Подсчитывает количество эмодзи в тексте"""
13
- return len([c for c in text if c in emoji.EMOJI_DATA])
 
 
 
 
 
14
 
15
- def extract_mentions(text):
16
- """Извлекает упоминания пользователей из текста"""
17
- return re.findall(r'@[\w\.]+', text)
 
 
 
 
 
 
18
 
19
- def analyze_sentiment(text):
20
- """Простой анализ тональности по эмодзи и ключевым словам"""
21
- positive_indicators = ['🔥', '❤️', '👍', 'круто', 'супер', 'класс', 'огонь', 'пушка']
22
- negative_indicators = ['👎', '😢', 'плохо', 'ужас']
23
-
24
- text_lower = text.lower()
25
- positive_count = sum(1 for ind in positive_indicators if ind in text_lower)
26
- negative_count = sum(1 for ind in negative_indicators if ind in text_lower)
27
-
28
- if positive_count > negative_count:
29
- return 'positive'
30
- elif negative_count > positive_count:
31
- return 'negative'
32
- return 'neutral'
33
-
34
- def extract_comment_data(comment_text):
35
- """
36
- Извлекает данные из отдельного комментария
37
- Возвращает (username, comment_text, likes_count, week_number)
38
- """
39
- # Извлекаем имя пользователя
40
- username_match = re.search(r"Фото профиля ([^\n]+)", comment_text)
41
- if not username_match:
42
- return None, None, 0, 0
43
-
44
- username = username_match.group(1).strip()
45
-
46
- # Извлекаем текст комментария (теперь без имени пользователя)
47
- comment_pattern = fr"{username}\n(.*?)(?:\d+ нед\.)"
48
- comment_match = re.search(comment_pattern, comment_text, re.DOTALL)
49
- if comment_match:
50
- # Очищаем текст комментария от упоминаний пользователя в начале
51
- comment = clean_text(comment_match.group(1))
52
- comment = re.sub(fr'^{username}\s*', '', comment)
53
- comment = re.sub(r'^@[\w\.]+ ', '', comment) # Удаляем упоминания в начале
54
- else:
55
- comment = ""
56
-
57
- # Извлекаем количество недель
58
- week_match = re.search(r'(\d+) нед\.', comment_text)
59
- weeks = int(week_match.group(1)) if week_match else 0
60
-
61
- # Ищем количество лайков
62
- likes = 0
63
- likes_patterns = [
64
- r"(\d+) отметк[аи] \"Нравится\"",
65
- r"Нравится: (\d+)",
66
- ]
67
-
68
- for pattern in likes_patterns:
69
- likes_match = re.search(pattern, comment_text)
70
- if likes_match:
71
- likes = int(likes_match.group(1))
72
- break
73
-
74
- return username, comment.strip(), likes, weeks
75
 
76
- def analyze_post(content_type, link_to_post, post_likes, post_date, description, comment_count, all_comments):
77
- try:
78
- # Разделяем комментарии по "Фото профиля"
79
- comments_blocks = re.split(r'(?=Фото профиля)', all_comments)
80
- comments_blocks = [block for block in comments_blocks if block.strip()]
81
-
82
- # Основные списки для данных
83
- usernames = []
84
- comments = []
85
- likes = []
86
- weeks = []
87
-
88
- # Дополнительные метрики
89
- total_emojis = 0
90
- mentions = []
91
- sentiments = []
92
- comment_lengths = []
93
-
94
- # Обработка каждого комментария
95
- for block in comments_blocks:
96
- username, comment, like_count, week_number = extract_comment_data(block)
97
- if username and comment:
98
- usernames.append(username)
99
- comments.append(comment)
100
- likes.append(str(like_count))
101
- weeks.append(week_number)
102
-
103
- # Сбор дополнительных метрик
104
- total_emojis += count_emojis(comment)
105
- mentions.extend(extract_mentions(comment))
106
- sentiments.append(analyze_sentiment(comment))
107
- comment_lengths.append(len(comment))
108
-
109
- # Аналитика
110
- total_comments = len(comments)
111
- avg_comment_length = sum(comment_lengths) / total_comments if comment_lengths else 0
112
- sentiment_distribution = Counter(sentiments)
113
- most_active_users = Counter(usernames).most_common(5)
114
- most_mentioned = Counter(mentions).most_common(5)
115
- avg_likes = sum(map(int, likes)) / len(likes) if likes else 0
116
- earliest_week = max(weeks) if weeks else 0
117
- latest_week = min(weeks) if weeks else 0
118
-
119
- # Формируем выходные данные
120
- usernames_output = "\n".join(usernames)
121
- comments_output = "\n".join(comments)
122
- likes_chronology_output = "\n".join(likes)
123
- total_likes_sum = sum(map(int, likes))
124
-
125
- # Подробная аналитика
126
- analytics_summary = (
127
- f"Content Type: {content_type}\n"
128
- f"Link to Post: {link_to_post}\n\n"
129
- f"ОСНОВНАЯ СТАТИСТИКА:\n"
130
- f"- Всего комментариев: {total_comments}\n"
131
- f"- Всего лайков на комментариях: {total_likes_sum}\n"
132
- f"- Среднее количество лайков: {avg_likes:.1f}\n"
133
- f"- Период активности: {earliest_week}-{latest_week} недель\n\n"
134
- f"АНАЛИЗ КОНТЕНТА:\n"
135
- f"- Средняя длина комментария: {avg_comment_length:.1f} символов\n"
136
- f"- Всего эмодзи использовано: {total_emojis}\n"
137
- f"- Тональность комментариев:\n"
138
- f" * Позитивных: {sentiment_distribution['positive']}\n"
139
- f" * Нейтральных: {sentiment_distribution['neutral']}\n"
140
- f" * Негативных: {sentiment_distribution['negative']}\n\n"
141
- f"АКТИВНОСТЬ ПОЛЬЗОВАТЕЛЕЙ:\n"
142
- f"Самые активные комментаторы:\n"
143
- + "\n".join([f"- {user}: {count} комментариев" for user, count in most_active_users]) + "\n\n"
144
- f"Самые упоминаемые пользователи:\n"
145
- + "\n".join([f"- {user}: {count} упоминаний" for user, count in most_mentioned if user]) + "\n\n"
146
- f"ВОВЛЕЧЕННОСТЬ:\n"
147
- f"- Процент комментариев с лайками: {(sum(1 for l in likes if int(l) > 0) / total_comments * 100):.1f}%\n"
148
- f"- Процент комментариев с эмодзи: {(sum(1 for c in comments if count_emojis(c) > 0) / total_comments * 100):.1f}%\n"
149
  )
 
 
 
 
 
 
 
 
 
 
 
150
 
151
- return analytics_summary, usernames_output, comments_output, likes_chronology_output, str(total_likes_sum)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
- except Exception as e:
154
- error_message = f"Произошла ошибка при обработке: {str(e)}\n{str(type(e))}"
155
- return error_message, error_message, error_message, error_message, "0"
156
-
157
- # Создаем интерфейс Gradio
158
- iface = gr.Interface(
159
- fn=analyze_post,
160
- inputs=[
161
- gr.Radio(
162
- choices=["Photo", "Video"],
163
- label="Content Type",
164
- value="Photo"
165
- ),
166
- gr.Textbox(
167
- label="Link to Post",
168
- placeholder="Введите ссылку на пост"
169
- ),
170
- gr.Number(
171
- label="Likes",
172
- value=0
173
- ),
174
- gr.Textbox(
175
- label="Post Date",
176
- placeholder="Введите дату публикации"
177
- ),
178
- gr.Textbox(
179
- label="Description",
180
- placeholder="Введите описание поста",
181
- lines=3
182
- ),
183
- gr.Number(
184
- label="Total Comment Count",
185
- value=0
186
- ),
187
- gr.Textbox(
188
- label="All Comments",
189
- placeholder="Вставьте комментарии",
190
- lines=10
191
- )
192
- ],
193
- outputs=[
194
- gr.Textbox(label="Analytics Summary", lines=20),
195
- gr.Textbox(label="Usernames (Output 1)", lines=5),
196
- gr.Textbox(label="Comments (Output 2)", lines=5),
197
- gr.Textbox(label="Likes Chronology (Output 3)", lines=5),
198
- gr.Textbox(label="Total Likes on Comments (Output 4)")
199
- ],
200
- title="Instagram Comment Analyzer Pro",
201
- description="Расширенный анализатор комментариев Instagram с детальной аналитикой"
202
- )
203
 
204
  if __name__ == "__main__":
 
205
  iface.launch()
 
3
  from collections import Counter
4
  from datetime import datetime
5
  import emoji
6
+ from transformers import pipeline
7
+ import logging
8
+ from typing import Tuple, List, Optional
9
 
10
+ # Set up logging
11
+ logging.basicConfig(level=logging.INFO)
12
+ logger = logging.getLogger(__name__)
13
 
14
+ class CommentAnalyzer:
15
+ def __init__(self):
16
+ """Initialize the analyzer with sentiment model and compile regex patterns"""
17
+ try:
18
+ self.sentiment_model = pipeline("sentiment-analysis")
19
+ except Exception as e:
20
+ logger.error(f"Failed to load sentiment model: {e}")
21
+ raise
22
 
23
+ # Compile regex patterns for better performance
24
+ self.mention_pattern = re.compile(r'@[\w\.]+')
25
+ self.comment_pattern = re.compile(
26
+ r'Фото профиля\s+(.+?)\s+' # Username
27
+ r'((?:(?!Фото профиля).)+?)\s+' # Comment text
28
+ r'(\d+)?\s*(?:нравится|like[s]?)?\s*' # Likes count
29
+ r'(\d+)\s*(?:н|w)' # Week number
30
+ , re.DOTALL
31
+ )
32
 
33
+ def clean_text(self, text: str) -> str:
34
+ """Clean text by removing extra whitespace and normalizing line breaks"""
35
+ return ' '.join(text.split())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
+ def count_emojis(self, text: str) -> int:
38
+ """Count the number of emoji characters in text"""
39
+ return len([c for c in text if c in emoji.EMOJI_DATA])
40
+
41
+ def extract_mentions(self, text: str) -> List[str]:
42
+ """Extract @mentions from text"""
43
+ return self.mention_pattern.findall(text)
44
+
45
+ def analyze_sentiment(self, text: str) -> str:
46
+ """Analyze text sentiment using the loaded model"""
47
+ try:
48
+ result = self.sentiment_model(text[:512]) # Limit text length for model
49
+ sentiment = result[0]['label']
50
+ if sentiment == 'POSITIVE':
51
+ return 'positive'
52
+ elif sentiment == 'NEGATIVE':
53
+ return 'negative'
54
+ return 'neutral'
55
+ except Exception as e:
56
+ logger.warning(f"Sentiment analysis failed: {e}")
57
+ return 'neutral'
58
+
59
+ def extract_comment_data(self, comment_block: str) -> Tuple[Optional[str], Optional[str], int, int]:
60
+ """
61
+ Extract structured data from a comment block
62
+ Returns: (username, comment_text, likes_count, week_number)
63
+ """
64
+ match = self.comment_pattern.search(comment_block)
65
+ if not match:
66
+ return None, None, 0, 0
67
+
68
+ username, comment, likes, week = match.groups()
69
+ return (
70
+ username.strip(),
71
+ self.clean_text(comment),
72
+ int(likes or 0),
73
+ int(week or 0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  )
75
+
76
+ def analyze_post(self,
77
+ content_type: str,
78
+ link_to_post: str,
79
+ post_likes: int,
80
+ post_date: str,
81
+ description: str,
82
+ comment_count: int,
83
+ all_comments: str) -> Tuple[str, str, str, str, str]:
84
+ """
85
+ Analyze Instagram post comments and generate comprehensive analytics
86
 
87
+ Args:
88
+ content_type: Type of content ("Photo" or "Video")
89
+ link_to_post: URL of the post
90
+ post_likes: Number of likes on the post
91
+ post_date: Date of post publication
92
+ description: Post description/caption
93
+ comment_count: Total number of comments
94
+ all_comments: Raw text containing all comments
95
+
96
+ Returns:
97
+ Tuple containing:
98
+ - Analytics summary
99
+ - List of usernames
100
+ - List of comments
101
+ - Chronological list of likes
102
+ - Total likes count
103
+ """
104
+ try:
105
+ # Split comments into blocks
106
+ comments_blocks = [block for block in re.split(r'(?=Фото профиля)', all_comments) if block.strip()]
107
+
108
+ # Initialize data containers
109
+ data = {
110
+ 'usernames': [],
111
+ 'comments': [],
112
+ 'likes': [],
113
+ 'weeks': [],
114
+ 'emojis': 0,
115
+ 'mentions': [],
116
+ 'sentiments': [],
117
+ 'lengths': []
118
+ }
119
+
120
+ # Process each comment block
121
+ for block in comments_blocks:
122
+ username, comment, like_count, week = self.extract_comment_data(block)
123
+ if username and comment:
124
+ data['usernames'].append(username)
125
+ data['comments'].append(comment)
126
+ data['likes'].append(like_count)
127
+ data['weeks'].append(week)
128
+
129
+ # Collect metrics
130
+ data['emojis'] += self.count_emojis(comment)
131
+ data['mentions'].extend(self.extract_mentions(comment))
132
+ data['sentiments'].append(self.analyze_sentiment(comment))
133
+ data['lengths'].append(len(comment))
134
+
135
+ # Calculate analytics
136
+ total_comments = len(data['comments'])
137
+ if total_comments == 0:
138
+ raise ValueError("No valid comments found in input")
139
+
140
+ analytics = {
141
+ 'avg_length': sum(data['lengths']) / total_comments,
142
+ 'sentiment_dist': Counter(data['sentiments']),
143
+ 'active_users': Counter(data['usernames']).most_common(5),
144
+ 'top_mentions': Counter(data['mentions']).most_common(5),
145
+ 'avg_likes': sum(data['likes']) / total_comments,
146
+ 'weeks_range': (min(data['weeks']), max(data['weeks'])),
147
+ 'total_likes': sum(data['likes'])
148
+ }
149
+
150
+ # Generate summary
151
+ summary = self._format_analytics_summary(
152
+ content_type, link_to_post, data, analytics, total_comments
153
+ )
154
+
155
+ return (
156
+ summary,
157
+ '\n'.join(data['usernames']),
158
+ '\n'.join(data['comments']),
159
+ '\n'.join(map(str, data['likes'])),
160
+ str(analytics['total_likes'])
161
+ )
162
+
163
+ except Exception as e:
164
+ logger.error(f"Error analyzing post: {e}", exc_info=True)
165
+ return (f"Error during analysis: {str(e)}", "", "", "", "0")
166
+
167
+ def _format_analytics_summary(self, content_type, link, data, analytics, total_comments):
168
+ """Format analytics data into a readable summary"""
169
+ return f"""
170
+ Content Type: {content_type}
171
+ Link to Post: {link}
172
+
173
+ ОСНОВНАЯ СТАТИСТИКА:
174
+ - Всего комментариев: {total_comments}
175
+ - Всего лайков на комментариях: {analytics['total_likes']}
176
+ - Среднее количество лайков: {analytics['avg_likes']:.1f}
177
+ - Период активности: {analytics['weeks_range'][0]}-{analytics['weeks_range'][1]} недель
178
+
179
+ АНАЛИЗ КОНТЕНТА:
180
+ - Средняя длина комментария: {analytics['avg_length']:.1f} символов
181
+ - Всего эмодзи использовано: {data['emojis']}
182
+ - Тональность комментариев:
183
+ * Позитивных: {analytics['sentiment_dist']['positive']}
184
+ * Нейтральных: {analytics['sentiment_dist']['neutral']}
185
+ * Негативных: {analytics['sentiment_dist']['negative']}
186
+
187
+ АКТИВНОСТЬ ПОЛЬЗОВАТЕЛЕЙ:
188
+ Самые активные комментаторы:
189
+ {chr(10).join(f"- {user}: {count} комментариев" for user, count in analytics['active_users'])}
190
+
191
+ Самые упоминаемые пользователи:
192
+ {chr(10).join(f"- {user}: {count} упоминаний" for user, count in analytics['top_mentions'] if user)}
193
+
194
+ ВОВЛЕЧЕННОСТЬ:
195
+ - Процент комментариев с лайками: {(sum(1 for l in data['likes'] if l > 0) / total_comments * 100):.1f}%
196
+ - Процент комментариев с эмодзи: {(sum(1 for c in data['comments'] if self.count_emojis(c) > 0) / total_comments * 100):.1f}%
197
+ """
198
+
199
+ def create_interface():
200
+ """Create and configure the Gradio interface"""
201
+ analyzer = CommentAnalyzer()
202
 
203
+ iface = gr.Interface(
204
+ fn=analyzer.analyze_post,
205
+ inputs=[
206
+ gr.Radio(
207
+ choices=["Photo", "Video"],
208
+ label="Content Type",
209
+ value="Photo"
210
+ ),
211
+ gr.Textbox(
212
+ label="Link to Post",
213
+ placeholder="Введите ссылку на пост"
214
+ ),
215
+ gr.Number(
216
+ label="Likes",
217
+ value=0
218
+ ),
219
+ gr.Textbox(
220
+ label="Post Date",
221
+ placeholder="Введите дату публикации"
222
+ ),
223
+ gr.Textbox(
224
+ label="Description",
225
+ placeholder="Введите описание поста",
226
+ lines=3
227
+ ),
228
+ gr.Number(
229
+ label="Total Comment Count",
230
+ value=0
231
+ ),
232
+ gr.Textbox(
233
+ label="All Comments",
234
+ placeholder="Вставьте комментарии",
235
+ lines=10
236
+ )
237
+ ],
238
+ outputs=[
239
+ gr.Textbox(label="Analytics Summary", lines=20),
240
+ gr.Textbox(label="Usernames (Output 1)", lines=5),
241
+ gr.Textbox(label="Comments (Output 2)", lines=5),
242
+ gr.Textbox(label="Likes Chronology (Output 3)", lines=5),
243
+ gr.Textbox(label="Total Likes on Comments (Output 4)")
244
+ ],
245
+ title="Instagram Comment Analyzer Pro",
246
+ description="Расширенный анализатор комментариев Instagram с детальной аналитикой"
247
+ )
248
+ return iface
 
 
 
 
249
 
250
  if __name__ == "__main__":
251
+ iface = create_interface()
252
  iface.launch()