|
import gradio as gr |
|
import re |
|
import emoji |
|
import logging |
|
from typing import Tuple, Optional |
|
from functools import lru_cache |
|
from collections import Counter |
|
|
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger(__name__) |
|
|
|
def count_emojis(text: str) -> int: |
|
"""Подсчет количества эмодзи в тексте""" |
|
return len([c for c in text if c in emoji.EMOJI_DATA]) |
|
|
|
def extract_mentions(text: str) -> list: |
|
"""Извлечение упоминаний пользователей""" |
|
return re.findall(r'@(\w+)', text) |
|
|
|
def is_spam(text: str) -> bool: |
|
"""Определение спам-комментариев""" |
|
spam_indicators = ['🔥' * 3, '❤️' * 3, 'follow me', 'check my'] |
|
return any(indicator in text.lower() for indicator in spam_indicators) |
|
|
|
def extract_comment_data(comment_text: str) -> Tuple[Optional[str], Optional[str], int, int]: |
|
"""Извлекает данные из комментария""" |
|
try: |
|
if 'отметок "Нравится"' in comment_text: |
|
return None, None, 0, 0 |
|
|
|
|
|
username_match = re.search(r"Фото профиля ([^\n]+)", comment_text) |
|
if not username_match: |
|
return None, None, 0, 0 |
|
|
|
username = username_match.group(1).strip() |
|
|
|
|
|
lines = comment_text.split('\n') |
|
comment = "" |
|
for i, line in enumerate(lines): |
|
if username in line and i + 1 < len(lines): |
|
comment = lines[i + 1].strip() |
|
comment = re.sub(r'\d+\s*(?:ч\.|нед\.)\s*$', '', comment) |
|
comment = re.sub(r'"Нравится":\s*\d+\s*Ответить\s*$', '', comment) |
|
break |
|
|
|
|
|
likes_match = re.search(r'"Нравится":\s*(\d+)', comment_text) |
|
likes = int(likes_match.group(1)) if likes_match else 0 |
|
|
|
|
|
time_match = re.search(r'(\d+)\s*(?:ч\.|нед\.)', comment_text) |
|
time = int(time_match.group(1)) if time_match else 0 |
|
|
|
return username, comment.strip(), likes, time |
|
|
|
except Exception as e: |
|
logger.error(f"Error extracting data: {e}") |
|
return None, None, 0, 0 |
|
|
|
@lru_cache(maxsize=100) |
|
def analyze_post(content_type: str, link: str, post_likes: int, |
|
post_date: str, description: str, comment_count: int, |
|
all_comments: str) -> Tuple[str, str, str, str, str]: |
|
"""Анализирует пост и комментарии""" |
|
try: |
|
if not all_comments or 'Фото профиля' not in all_comments: |
|
return "Ошибка: неверный формат данных", "", "", "", "0" |
|
|
|
blocks = re.split(r'(?=Фото профиля)', all_comments) |
|
blocks = [b.strip() for b in blocks if b.strip()] |
|
|
|
comments_data = [] |
|
total_emojis = 0 |
|
mentions = [] |
|
spam_count = 0 |
|
|
|
for block in blocks: |
|
username, comment, likes, time = extract_comment_data(block) |
|
if username and comment: |
|
emoji_count = count_emojis(comment) |
|
comment_mentions = extract_mentions(comment) |
|
is_spam_comment = is_spam(comment) |
|
|
|
comments_data.append({ |
|
'username': username, |
|
'comment': comment, |
|
'likes': likes, |
|
'time': time, |
|
'emoji_count': emoji_count, |
|
'mentions': comment_mentions, |
|
'is_spam': is_spam_comment |
|
}) |
|
|
|
total_emojis += emoji_count |
|
mentions.extend(comment_mentions) |
|
if is_spam_comment: |
|
spam_count += 1 |
|
|
|
|
|
total_comments = len(comments_data) |
|
unique_users = len(set(item['username'] for item in comments_data)) |
|
total_likes = sum(item['likes'] for item in comments_data) |
|
avg_likes = total_likes / total_comments if total_comments > 0 else 0 |
|
|
|
|
|
commenter_counts = Counter(item['username'] for item in comments_data) |
|
top_commenters = commenter_counts.most_common(5) |
|
|
|
analytics = f""" |
|
📊 Подробный анализ комментариев: |
|
|
|
Основные метрики: |
|
• Всего комментариев: {total_comments} |
|
• Уникальных пользователей: {unique_users} |
|
• Общее количество лайков: {total_likes} |
|
• Среднее количество лайков: {avg_likes:.1f} |
|
|
|
Дополнительная информация: |
|
• Использовано эмодзи: {total_emojis} |
|
• Количество упоминаний: {len(mentions)} |
|
• Выявлено спам-комментариев: {spam_count} |
|
|
|
Топ комментаторы: |
|
{chr(10).join(f'• {user}: {count} комментария' for user, count in top_commenters if count > 1)} |
|
""" |
|
|
|
return ( |
|
analytics, |
|
"\n".join(item['username'] for item in comments_data), |
|
"\n".join(item['comment'] for item in comments_data), |
|
"\n".join(str(item['likes']) for item in comments_data), |
|
str(total_likes) |
|
) |
|
|
|
except Exception as e: |
|
logger.error(f"Analysis error: {e}") |
|
return str(e), "", "", "", "0" |
|
|
|
|
|
iface = gr.Interface( |
|
fn=analyze_post, |
|
inputs=[ |
|
gr.Radio( |
|
choices=["Photo", "Video"], |
|
label="Content Type", |
|
value="Photo" |
|
), |
|
gr.Textbox( |
|
label="Link to Post", |
|
placeholder="Вставьте ссылку на пост" |
|
), |
|
gr.Number( |
|
label="Likes", |
|
value=0, |
|
minimum=0 |
|
), |
|
gr.Textbox( |
|
label="Post Date", |
|
placeholder="YYYY-MM-DD" |
|
), |
|
gr.Textbox( |
|
label="Description", |
|
lines=3, |
|
placeholder="Описание поста" |
|
), |
|
gr.Number( |
|
label="Comment Count", |
|
value=0, |
|
minimum=0 |
|
), |
|
gr.Textbox( |
|
label="Comments", |
|
lines=10, |
|
placeholder="Вставьте комментарии" |
|
) |
|
], |
|
outputs=[ |
|
gr.Textbox(label="Analytics Summary", lines=15), |
|
gr.Textbox(label="Usernames"), |
|
gr.Textbox(label="Comments"), |
|
gr.Textbox(label="Likes Chronology"), |
|
gr.Textbox(label="Total Likes on Comments") |
|
], |
|
title="Enhanced Instagram Comment Analyzer", |
|
description=""" |
|
Анализатор комментариев Instagram с расширенной аналитикой. |
|
Возможности: |
|
• Анализ комментариев и лайков |
|
• Подсчет эмодзи и упоминаний |
|
• Определение спам-комментариев |
|
• Статистика по пользователям |
|
""", |
|
theme="default", |
|
css=""" |
|
.gradio-container { |
|
font-family: 'Arial', sans-serif; |
|
} |
|
.output-text { |
|
white-space: pre-wrap; |
|
} |
|
.analytics-summary { |
|
background-color: #f5f5f5; |
|
padding: 15px; |
|
border-radius: 8px; |
|
} |
|
""" |
|
) |
|
|
|
if __name__ == "__main__": |
|
try: |
|
iface.launch( |
|
share=True, |
|
debug=True, |
|
enable_queue=True, |
|
show_error=True |
|
) |
|
except Exception as e: |
|
logger.error(f"Error launching interface: {e}", exc_info=True) |