|
import gradio as gr |
|
import re |
|
from collections import Counter |
|
from datetime import datetime |
|
import emoji |
|
from transformers import pipeline |
|
import logging |
|
from typing import Tuple, List, Optional |
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger(__name__) |
|
|
|
class CommentAnalyzer: |
|
def __init__(self): |
|
"""Initialize the analyzer with sentiment model and compile regex patterns""" |
|
try: |
|
self.sentiment_model = pipeline("sentiment-analysis") |
|
except Exception as e: |
|
logger.error(f"Failed to load sentiment model: {e}") |
|
raise |
|
|
|
|
|
self.mention_pattern = re.compile(r'@[\w\.]+') |
|
self.comment_pattern = re.compile( |
|
r'Фото профиля\s+(.+?)\s+' |
|
r'((?:(?!Фото профиля).)+?)\s+' |
|
r'(\d+)?\s*(?:нравится|like[s]?)?\s*' |
|
r'(\d+)\s*(?:н|w)' |
|
, re.DOTALL |
|
) |
|
|
|
def clean_text(self, text: str) -> str: |
|
"""Clean text by removing extra whitespace and normalizing line breaks""" |
|
return ' '.join(text.split()) |
|
|
|
def count_emojis(self, text: str) -> int: |
|
"""Count the number of emoji characters in text""" |
|
return len([c for c in text if c in emoji.EMOJI_DATA]) |
|
|
|
def extract_mentions(self, text: str) -> List[str]: |
|
"""Extract @mentions from text""" |
|
return self.mention_pattern.findall(text) |
|
|
|
def analyze_sentiment(self, text: str) -> str: |
|
"""Analyze text sentiment using the loaded model""" |
|
try: |
|
result = self.sentiment_model(text[:512]) |
|
sentiment = result[0]['label'] |
|
if sentiment == 'POSITIVE': |
|
return 'positive' |
|
elif sentiment == 'NEGATIVE': |
|
return 'negative' |
|
return 'neutral' |
|
except Exception as e: |
|
logger.warning(f"Sentiment analysis failed: {e}") |
|
return 'neutral' |
|
|
|
def extract_comment_data(self, comment_block: str) -> Tuple[Optional[str], Optional[str], int, int]: |
|
""" |
|
Extract structured data from a comment block |
|
Returns: (username, comment_text, likes_count, week_number) |
|
""" |
|
match = self.comment_pattern.search(comment_block) |
|
if not match: |
|
return None, None, 0, 0 |
|
|
|
username, comment, likes, week = match.groups() |
|
return ( |
|
username.strip(), |
|
self.clean_text(comment), |
|
int(likes or 0), |
|
int(week or 0) |
|
) |
|
|
|
def analyze_post(self, |
|
content_type: str, |
|
link_to_post: str, |
|
post_likes: int, |
|
post_date: str, |
|
description: str, |
|
comment_count: int, |
|
all_comments: str) -> Tuple[str, str, str, str, str]: |
|
""" |
|
Analyze Instagram post comments and generate comprehensive analytics |
|
|
|
Args: |
|
content_type: Type of content ("Photo" or "Video") |
|
link_to_post: URL of the post |
|
post_likes: Number of likes on the post |
|
post_date: Date of post publication |
|
description: Post description/caption |
|
comment_count: Total number of comments |
|
all_comments: Raw text containing all comments |
|
|
|
Returns: |
|
Tuple containing: |
|
- Analytics summary |
|
- List of usernames |
|
- List of comments |
|
- Chronological list of likes |
|
- Total likes count |
|
""" |
|
try: |
|
|
|
comments_blocks = [block for block in re.split(r'(?=Фото профиля)', all_comments) if block.strip()] |
|
|
|
|
|
data = { |
|
'usernames': [], |
|
'comments': [], |
|
'likes': [], |
|
'weeks': [], |
|
'emojis': 0, |
|
'mentions': [], |
|
'sentiments': [], |
|
'lengths': [] |
|
} |
|
|
|
|
|
for block in comments_blocks: |
|
username, comment, like_count, week = self.extract_comment_data(block) |
|
if username and comment: |
|
data['usernames'].append(username) |
|
data['comments'].append(comment) |
|
data['likes'].append(like_count) |
|
data['weeks'].append(week) |
|
|
|
|
|
data['emojis'] += self.count_emojis(comment) |
|
data['mentions'].extend(self.extract_mentions(comment)) |
|
data['sentiments'].append(self.analyze_sentiment(comment)) |
|
data['lengths'].append(len(comment)) |
|
|
|
|
|
total_comments = len(data['comments']) |
|
if total_comments == 0: |
|
raise ValueError("No valid comments found in input") |
|
|
|
analytics = { |
|
'avg_length': sum(data['lengths']) / total_comments, |
|
'sentiment_dist': Counter(data['sentiments']), |
|
'active_users': Counter(data['usernames']).most_common(5), |
|
'top_mentions': Counter(data['mentions']).most_common(5), |
|
'avg_likes': sum(data['likes']) / total_comments, |
|
'weeks_range': (min(data['weeks']), max(data['weeks'])), |
|
'total_likes': sum(data['likes']) |
|
} |
|
|
|
|
|
summary = self._format_analytics_summary( |
|
content_type, link_to_post, data, analytics, total_comments |
|
) |
|
|
|
return ( |
|
summary, |
|
'\n'.join(data['usernames']), |
|
'\n'.join(data['comments']), |
|
'\n'.join(map(str, data['likes'])), |
|
str(analytics['total_likes']) |
|
) |
|
|
|
except Exception as e: |
|
logger.error(f"Error analyzing post: {e}", exc_info=True) |
|
return (f"Error during analysis: {str(e)}", "", "", "", "0") |
|
|
|
def _format_analytics_summary(self, content_type, link, data, analytics, total_comments): |
|
"""Format analytics data into a readable summary""" |
|
return f""" |
|
Content Type: {content_type} |
|
Link to Post: {link} |
|
|
|
ОСНОВНАЯ СТАТИСТИКА: |
|
- Всего комментариев: {total_comments} |
|
- Всего лайков на комментариях: {analytics['total_likes']} |
|
- Среднее количество лайков: {analytics['avg_likes']:.1f} |
|
- Период активности: {analytics['weeks_range'][0]}-{analytics['weeks_range'][1]} недель |
|
|
|
АНАЛИЗ КОНТЕНТА: |
|
- Средняя длина комментария: {analytics['avg_length']:.1f} символов |
|
- Всего эмодзи использовано: {data['emojis']} |
|
- Тональность комментариев: |
|
* Позитивных: {analytics['sentiment_dist']['positive']} |
|
* Нейтральных: {analytics['sentiment_dist']['neutral']} |
|
* Негативных: {analytics['sentiment_dist']['negative']} |
|
|
|
АКТИВНОСТЬ ПОЛЬЗОВАТЕЛЕЙ: |
|
Самые активные комментаторы: |
|
{chr(10).join(f"- {user}: {count} комментариев" for user, count in analytics['active_users'])} |
|
|
|
Самые упоминаемые пользователи: |
|
{chr(10).join(f"- {user}: {count} упоминаний" for user, count in analytics['top_mentions'] if user)} |
|
|
|
ВОВЛЕЧЕННОСТЬ: |
|
- Процент комментариев с лайками: {(sum(1 for l in data['likes'] if l > 0) / total_comments * 100):.1f}% |
|
- Процент комментариев с эмодзи: {(sum(1 for c in data['comments'] if self.count_emojis(c) > 0) / total_comments * 100):.1f}% |
|
""" |
|
|
|
def create_interface(): |
|
"""Create and configure the Gradio interface""" |
|
analyzer = CommentAnalyzer() |
|
|
|
iface = gr.Interface( |
|
fn=analyzer.analyze_post, |
|
inputs=[ |
|
gr.Radio( |
|
choices=["Photo", "Video"], |
|
label="Content Type", |
|
value="Photo" |
|
), |
|
gr.Textbox( |
|
label="Link to Post", |
|
placeholder="Введите ссылку на пост" |
|
), |
|
gr.Number( |
|
label="Likes", |
|
value=0 |
|
), |
|
gr.Textbox( |
|
label="Post Date", |
|
placeholder="Введите дату публикации" |
|
), |
|
gr.Textbox( |
|
label="Description", |
|
placeholder="Введите описание поста", |
|
lines=3 |
|
), |
|
gr.Number( |
|
label="Total Comment Count", |
|
value=0 |
|
), |
|
gr.Textbox( |
|
label="All Comments", |
|
placeholder="Вставьте комментарии", |
|
lines=10 |
|
) |
|
], |
|
outputs=[ |
|
gr.Textbox(label="Analytics Summary", lines=20), |
|
gr.Textbox(label="Usernames (Output 1)", lines=5), |
|
gr.Textbox(label="Comments (Output 2)", lines=5), |
|
gr.Textbox(label="Likes Chronology (Output 3)", lines=5), |
|
gr.Textbox(label="Total Likes on Comments (Output 4)") |
|
], |
|
title="Instagram Comment Analyzer Pro", |
|
description="Расширенный анализатор комментариев Instagram с детальной аналитикой" |
|
) |
|
return iface |
|
|
|
if __name__ == "__main__": |
|
iface = create_interface() |
|
iface.launch() |