new-space / app.py
boompack's picture
Update app.py
bf4724c verified
raw
history blame
7.57 kB
import gradio as gr
import re
import emoji
import logging
from typing import Tuple, Optional
from functools import lru_cache
from collections import Counter
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def count_emojis(text: str) -> int:
"""Подсчет количества эмодзи в тексте"""
return len([c for c in text if c in emoji.EMOJI_DATA])
def extract_mentions(text: str) -> list:
"""Извлечение упоминаний пользователей"""
return re.findall(r'@(\w+)', text)
def is_spam(text: str) -> bool:
"""Определение спам-комментариев"""
spam_indicators = ['🔥' * 3, '❤️' * 3, 'follow me', 'check my']
return any(indicator in text.lower() for indicator in spam_indicators)
def extract_comment_data(comment_text: str) -> Tuple[Optional[str], Optional[str], int, int]:
"""Извлекает данные из комментария"""
try:
# Extract username
username_match = re.search(r'Фото профиля\s+(.+?)\n', comment_text)
username = username_match.group(1).strip() if username_match else None
if not username:
return None, None, 0, 0
# Extract comment text
comment_lines = comment_text.split('\n')
comment = ""
time_pattern = r'\d+\s*(?:ч\.|нед\.)'
# Identify where the comment text starts
for i, line in enumerate(comment_lines):
if re.search(time_pattern, line):
if i + 1 < len(comment_lines):
comment = comment_lines[i + 1].strip()
break
# Clean up comment text
comment = re.sub(r'\d+\s*(?:ч\.|нед\.)\s*$', '', comment)
comment = re.sub(r'"Нравится":\s*\d+\s*Ответить\s*$', '', comment)
# Extract likes
likes_match = re.search(r'"Нравится":\s*(\d+)', comment_text)
likes = int(likes_match.group(1)) if likes_match else 0
# Extract time
time_match = re.search(r'(\d+)\s*(?:ч\.|нед\.)', comment_text)
time = int(time_match.group(1)) if time_match else 0
return username, comment.strip(), likes, time
except Exception as e:
logger.error(f"Error extracting data: {e}")
return None, None, 0, 0
@lru_cache(maxsize=100)
def analyze_post(content_type: str, link: str, post_likes: int,
post_date: str, description: str, comment_count: int,
all_comments: str) -> Tuple[str, str, str, str, str]:
"""Анализирует пост и комментарии"""
try:
if not all_comments or 'Фото профиля' not in all_comments:
return "Ошибка: неверный формат данных", "", "", "", "0"
blocks = re.split(r'(?=Фото профиля)', all_comments)
blocks = [b.strip() for b in blocks if b.strip()]
comments_data = []
total_emojis = 0
mentions = []
spam_count = 0
for block in blocks:
username, comment, likes, time = extract_comment_data(block)
if username and comment:
emoji_count = count_emojis(comment)
comment_mentions = extract_mentions(comment)
is_spam_comment = is_spam(comment)
comments_data.append({
'username': username,
'comment': comment,
'likes': likes,
'time': time,
'emoji_count': emoji_count,
'mentions': comment_mentions,
'is_spam': is_spam_comment
})
total_emojis += emoji_count
mentions.extend(comment_mentions)
if is_spam_comment:
spam_count += 1
# Подсчет статистики
total_comments = len(comments_data)
unique_users = len(set(item['username'] for item in comments_data))
total_likes = sum(item['likes'] for item in comments_data)
avg_likes = total_likes / total_comments if total_comments > 0 else 0
# Топ комментаторы
commenter_counts = Counter(item['username'] for item in comments_data)
top_commenters = commenter_counts.most_common(5)
analytics = f"""
📊 Подробный анализ комментариев:
Основные метрики:
• Всего комментариев: {total_comments}
• Уникальных пользователей: {unique_users}
• Общее количество лайков: {total_likes}
• Среднее количество лайков: {avg_likes:.1f}
Дополнительная информация:
• Использовано эмодзи: {total_emojis}
• Количество упоминаний: {len(mentions)}
• Выявлено спам-комментариев: {spam_count}
Топ комментаторы:
{chr(10).join(f'• {user}: {count} комментария' for user, count in top_commenters if count > 1)}
"""
return (
analytics,
"\n".join(item['username'] for item in comments_data),
"\n".join(item['comment'] for item in comments_data),
"\n".join(str(item['likes']) for item in comments_data),
str(total_likes)
)
except Exception as e:
logger.error(f"Analysis error: {e}")
return str(e), "", "", "", "0"
# Создаем интерфейс Gradio
iface = gr.Interface(
fn=analyze_post,
inputs=[
gr.Radio(
choices=["Photo", "Video"],
label="Content Type",
value="Photo"
),
gr.Textbox(
label="Link to Post",
placeholder="Вставьте ссылку на пост"
),
gr.Number(
label="Likes",
value=0,
minimum=0
),
gr.Textbox(
label="Post Date",
placeholder="YYYY-MM-DD"
),
gr.Textbox(
label="Description",
lines=3,
placeholder="Описание поста"
),
gr.Number(
label="Comment Count",
value=0,
minimum=0
),
gr.Textbox(
label="Comments",
lines=10,
placeholder="Вставьте комментарии"
)
],
outputs=[
gr.Textbox(label="Analytics Summary", lines=15),
gr.Textbox(label="Usernames"),
gr.Textbox(label="Comments"),
gr.Textbox(label="Likes Chronology"),
gr.Textbox(label="Total Likes on Comments")
],
title="Enhanced Instagram Comment Analyzer",
description="Анализатор комментариев Instagram с расширенной аналитикой",
theme="default"
)
if __name__ == "__main__":
try:
iface.launch(
share=True, # Создает публичную ссылку
debug=True, # Включает режим отладки
show_error=True # Показывает подробности ошибок
)
except Exception as e:
logger.error(f"Error launching interface: {e}", exc_info=True)