File size: 9,976 Bytes
b4bbaee cbff93c e5c8ff6 cbff93c e5c8ff6 cbff93c e5c8ff6 cbff93c e5c8ff6 cbff93c e5c8ff6 b4bbaee e5c8ff6 cbff93c e5c8ff6 cbff93c e5c8ff6 cbff93c e5c8ff6 b4bbaee cbff93c e5c8ff6 cbff93c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 |
import gradio as gr
import re
from collections import Counter
from datetime import datetime
import emoji
from transformers import pipeline
import logging
from typing import Tuple, List, Optional
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class CommentAnalyzer:
def __init__(self):
"""Initialize the analyzer with sentiment model and compile regex patterns"""
try:
self.sentiment_model = pipeline("sentiment-analysis")
except Exception as e:
logger.error(f"Failed to load sentiment model: {e}")
raise
# Compile regex patterns for better performance
self.mention_pattern = re.compile(r'@[\w\.]+')
self.comment_pattern = re.compile(
r'Фото профиля\s+(.+?)\s+' # Username
r'((?:(?!Фото профиля).)+?)\s+' # Comment text
r'(\d+)?\s*(?:нравится|like[s]?)?\s*' # Likes count
r'(\d+)\s*(?:н|w)' # Week number
, re.DOTALL
)
def clean_text(self, text: str) -> str:
"""Clean text by removing extra whitespace and normalizing line breaks"""
return ' '.join(text.split())
def count_emojis(self, text: str) -> int:
"""Count the number of emoji characters in text"""
return len([c for c in text if c in emoji.EMOJI_DATA])
def extract_mentions(self, text: str) -> List[str]:
"""Extract @mentions from text"""
return self.mention_pattern.findall(text)
def analyze_sentiment(self, text: str) -> str:
"""Analyze text sentiment using the loaded model"""
try:
result = self.sentiment_model(text[:512]) # Limit text length for model
sentiment = result[0]['label']
if sentiment == 'POSITIVE':
return 'positive'
elif sentiment == 'NEGATIVE':
return 'negative'
return 'neutral'
except Exception as e:
logger.warning(f"Sentiment analysis failed: {e}")
return 'neutral'
def extract_comment_data(self, comment_block: str) -> Tuple[Optional[str], Optional[str], int, int]:
"""
Extract structured data from a comment block
Returns: (username, comment_text, likes_count, week_number)
"""
match = self.comment_pattern.search(comment_block)
if not match:
return None, None, 0, 0
username, comment, likes, week = match.groups()
return (
username.strip(),
self.clean_text(comment),
int(likes or 0),
int(week or 0)
)
def analyze_post(self,
content_type: str,
link_to_post: str,
post_likes: int,
post_date: str,
description: str,
comment_count: int,
all_comments: str) -> Tuple[str, str, str, str, str]:
"""
Analyze Instagram post comments and generate comprehensive analytics
Args:
content_type: Type of content ("Photo" or "Video")
link_to_post: URL of the post
post_likes: Number of likes on the post
post_date: Date of post publication
description: Post description/caption
comment_count: Total number of comments
all_comments: Raw text containing all comments
Returns:
Tuple containing:
- Analytics summary
- List of usernames
- List of comments
- Chronological list of likes
- Total likes count
"""
try:
# Split comments into blocks
comments_blocks = [block for block in re.split(r'(?=Фото профиля)', all_comments) if block.strip()]
# Initialize data containers
data = {
'usernames': [],
'comments': [],
'likes': [],
'weeks': [],
'emojis': 0,
'mentions': [],
'sentiments': [],
'lengths': []
}
# Process each comment block
for block in comments_blocks:
username, comment, like_count, week = self.extract_comment_data(block)
if username and comment:
data['usernames'].append(username)
data['comments'].append(comment)
data['likes'].append(like_count)
data['weeks'].append(week)
# Collect metrics
data['emojis'] += self.count_emojis(comment)
data['mentions'].extend(self.extract_mentions(comment))
data['sentiments'].append(self.analyze_sentiment(comment))
data['lengths'].append(len(comment))
# Calculate analytics
total_comments = len(data['comments'])
if total_comments == 0:
raise ValueError("No valid comments found in input")
analytics = {
'avg_length': sum(data['lengths']) / total_comments,
'sentiment_dist': Counter(data['sentiments']),
'active_users': Counter(data['usernames']).most_common(5),
'top_mentions': Counter(data['mentions']).most_common(5),
'avg_likes': sum(data['likes']) / total_comments,
'weeks_range': (min(data['weeks']), max(data['weeks'])),
'total_likes': sum(data['likes'])
}
# Generate summary
summary = self._format_analytics_summary(
content_type, link_to_post, data, analytics, total_comments
)
return (
summary,
'\n'.join(data['usernames']),
'\n'.join(data['comments']),
'\n'.join(map(str, data['likes'])),
str(analytics['total_likes'])
)
except Exception as e:
logger.error(f"Error analyzing post: {e}", exc_info=True)
return (f"Error during analysis: {str(e)}", "", "", "", "0")
def _format_analytics_summary(self, content_type, link, data, analytics, total_comments):
"""Format analytics data into a readable summary"""
return f"""
Content Type: {content_type}
Link to Post: {link}
ОСНОВНАЯ СТАТИСТИКА:
- Всего комментариев: {total_comments}
- Всего лайков на комментариях: {analytics['total_likes']}
- Среднее количество лайков: {analytics['avg_likes']:.1f}
- Период активности: {analytics['weeks_range'][0]}-{analytics['weeks_range'][1]} недель
АНАЛИЗ КОНТЕНТА:
- Средняя длина комментария: {analytics['avg_length']:.1f} символов
- Всего эмодзи использовано: {data['emojis']}
- Тональность комментариев:
* Позитивных: {analytics['sentiment_dist']['positive']}
* Нейтральных: {analytics['sentiment_dist']['neutral']}
* Негативных: {analytics['sentiment_dist']['negative']}
АКТИВНОСТЬ ПОЛЬЗОВАТЕЛЕЙ:
Самые активные комментаторы:
{chr(10).join(f"- {user}: {count} комментариев" for user, count in analytics['active_users'])}
Самые упоминаемые пользователи:
{chr(10).join(f"- {user}: {count} упоминаний" for user, count in analytics['top_mentions'] if user)}
ВОВЛЕЧЕННОСТЬ:
- Процент комментариев с лайками: {(sum(1 for l in data['likes'] if l > 0) / total_comments * 100):.1f}%
- Процент комментариев с эмодзи: {(sum(1 for c in data['comments'] if self.count_emojis(c) > 0) / total_comments * 100):.1f}%
"""
def create_interface():
"""Create and configure the Gradio interface"""
analyzer = CommentAnalyzer()
iface = gr.Interface(
fn=analyzer.analyze_post,
inputs=[
gr.Radio(
choices=["Photo", "Video"],
label="Content Type",
value="Photo"
),
gr.Textbox(
label="Link to Post",
placeholder="Введите ссылку на пост"
),
gr.Number(
label="Likes",
value=0
),
gr.Textbox(
label="Post Date",
placeholder="Введите дату публикации"
),
gr.Textbox(
label="Description",
placeholder="Введите описание поста",
lines=3
),
gr.Number(
label="Total Comment Count",
value=0
),
gr.Textbox(
label="All Comments",
placeholder="Вставьте комментарии",
lines=10
)
],
outputs=[
gr.Textbox(label="Analytics Summary", lines=20),
gr.Textbox(label="Usernames (Output 1)", lines=5),
gr.Textbox(label="Comments (Output 2)", lines=5),
gr.Textbox(label="Likes Chronology (Output 3)", lines=5),
gr.Textbox(label="Total Likes on Comments (Output 4)")
],
title="Instagram Comment Analyzer Pro",
description="Расширенный анализатор комментариев Instagram с детальной аналитикой"
)
return iface
if __name__ == "__main__":
iface = create_interface()
iface.launch() |