new-space / app.py
boompack's picture
Update app.py
e5c8ff6 verified
raw
history blame
9.98 kB
import gradio as gr
import re
from collections import Counter
from datetime import datetime
import emoji
from transformers import pipeline
import logging
from typing import Tuple, List, Optional
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class CommentAnalyzer:
def __init__(self):
"""Initialize the analyzer with sentiment model and compile regex patterns"""
try:
self.sentiment_model = pipeline("sentiment-analysis")
except Exception as e:
logger.error(f"Failed to load sentiment model: {e}")
raise
# Compile regex patterns for better performance
self.mention_pattern = re.compile(r'@[\w\.]+')
self.comment_pattern = re.compile(
r'Фото профиля\s+(.+?)\s+' # Username
r'((?:(?!Фото профиля).)+?)\s+' # Comment text
r'(\d+)?\s*(?:нравится|like[s]?)?\s*' # Likes count
r'(\d+)\s*(?:н|w)' # Week number
, re.DOTALL
)
def clean_text(self, text: str) -> str:
"""Clean text by removing extra whitespace and normalizing line breaks"""
return ' '.join(text.split())
def count_emojis(self, text: str) -> int:
"""Count the number of emoji characters in text"""
return len([c for c in text if c in emoji.EMOJI_DATA])
def extract_mentions(self, text: str) -> List[str]:
"""Extract @mentions from text"""
return self.mention_pattern.findall(text)
def analyze_sentiment(self, text: str) -> str:
"""Analyze text sentiment using the loaded model"""
try:
result = self.sentiment_model(text[:512]) # Limit text length for model
sentiment = result[0]['label']
if sentiment == 'POSITIVE':
return 'positive'
elif sentiment == 'NEGATIVE':
return 'negative'
return 'neutral'
except Exception as e:
logger.warning(f"Sentiment analysis failed: {e}")
return 'neutral'
def extract_comment_data(self, comment_block: str) -> Tuple[Optional[str], Optional[str], int, int]:
"""
Extract structured data from a comment block
Returns: (username, comment_text, likes_count, week_number)
"""
match = self.comment_pattern.search(comment_block)
if not match:
return None, None, 0, 0
username, comment, likes, week = match.groups()
return (
username.strip(),
self.clean_text(comment),
int(likes or 0),
int(week or 0)
)
def analyze_post(self,
content_type: str,
link_to_post: str,
post_likes: int,
post_date: str,
description: str,
comment_count: int,
all_comments: str) -> Tuple[str, str, str, str, str]:
"""
Analyze Instagram post comments and generate comprehensive analytics
Args:
content_type: Type of content ("Photo" or "Video")
link_to_post: URL of the post
post_likes: Number of likes on the post
post_date: Date of post publication
description: Post description/caption
comment_count: Total number of comments
all_comments: Raw text containing all comments
Returns:
Tuple containing:
- Analytics summary
- List of usernames
- List of comments
- Chronological list of likes
- Total likes count
"""
try:
# Split comments into blocks
comments_blocks = [block for block in re.split(r'(?=Фото профиля)', all_comments) if block.strip()]
# Initialize data containers
data = {
'usernames': [],
'comments': [],
'likes': [],
'weeks': [],
'emojis': 0,
'mentions': [],
'sentiments': [],
'lengths': []
}
# Process each comment block
for block in comments_blocks:
username, comment, like_count, week = self.extract_comment_data(block)
if username and comment:
data['usernames'].append(username)
data['comments'].append(comment)
data['likes'].append(like_count)
data['weeks'].append(week)
# Collect metrics
data['emojis'] += self.count_emojis(comment)
data['mentions'].extend(self.extract_mentions(comment))
data['sentiments'].append(self.analyze_sentiment(comment))
data['lengths'].append(len(comment))
# Calculate analytics
total_comments = len(data['comments'])
if total_comments == 0:
raise ValueError("No valid comments found in input")
analytics = {
'avg_length': sum(data['lengths']) / total_comments,
'sentiment_dist': Counter(data['sentiments']),
'active_users': Counter(data['usernames']).most_common(5),
'top_mentions': Counter(data['mentions']).most_common(5),
'avg_likes': sum(data['likes']) / total_comments,
'weeks_range': (min(data['weeks']), max(data['weeks'])),
'total_likes': sum(data['likes'])
}
# Generate summary
summary = self._format_analytics_summary(
content_type, link_to_post, data, analytics, total_comments
)
return (
summary,
'\n'.join(data['usernames']),
'\n'.join(data['comments']),
'\n'.join(map(str, data['likes'])),
str(analytics['total_likes'])
)
except Exception as e:
logger.error(f"Error analyzing post: {e}", exc_info=True)
return (f"Error during analysis: {str(e)}", "", "", "", "0")
def _format_analytics_summary(self, content_type, link, data, analytics, total_comments):
"""Format analytics data into a readable summary"""
return f"""
Content Type: {content_type}
Link to Post: {link}
ОСНОВНАЯ СТАТИСТИКА:
- Всего комментариев: {total_comments}
- Всего лайков на комментариях: {analytics['total_likes']}
- Среднее количество лайков: {analytics['avg_likes']:.1f}
- Период активности: {analytics['weeks_range'][0]}-{analytics['weeks_range'][1]} недель
АНАЛИЗ КОНТЕНТА:
- Средняя длина комментария: {analytics['avg_length']:.1f} символов
- Всего эмодзи использовано: {data['emojis']}
- Тональность комментариев:
* Позитивных: {analytics['sentiment_dist']['positive']}
* Нейтральных: {analytics['sentiment_dist']['neutral']}
* Негативных: {analytics['sentiment_dist']['negative']}
АКТИВНОСТЬ ПОЛЬЗОВАТЕЛЕЙ:
Самые активные комментаторы:
{chr(10).join(f"- {user}: {count} комментариев" for user, count in analytics['active_users'])}
Самые упоминаемые пользователи:
{chr(10).join(f"- {user}: {count} упоминаний" for user, count in analytics['top_mentions'] if user)}
ВОВЛЕЧЕННОСТЬ:
- Процент комментариев с лайками: {(sum(1 for l in data['likes'] if l > 0) / total_comments * 100):.1f}%
- Процент комментариев с эмодзи: {(sum(1 for c in data['comments'] if self.count_emojis(c) > 0) / total_comments * 100):.1f}%
"""
def create_interface():
"""Create and configure the Gradio interface"""
analyzer = CommentAnalyzer()
iface = gr.Interface(
fn=analyzer.analyze_post,
inputs=[
gr.Radio(
choices=["Photo", "Video"],
label="Content Type",
value="Photo"
),
gr.Textbox(
label="Link to Post",
placeholder="Введите ссылку на пост"
),
gr.Number(
label="Likes",
value=0
),
gr.Textbox(
label="Post Date",
placeholder="Введите дату публикации"
),
gr.Textbox(
label="Description",
placeholder="Введите описание поста",
lines=3
),
gr.Number(
label="Total Comment Count",
value=0
),
gr.Textbox(
label="All Comments",
placeholder="Вставьте комментарии",
lines=10
)
],
outputs=[
gr.Textbox(label="Analytics Summary", lines=20),
gr.Textbox(label="Usernames (Output 1)", lines=5),
gr.Textbox(label="Comments (Output 2)", lines=5),
gr.Textbox(label="Likes Chronology (Output 3)", lines=5),
gr.Textbox(label="Total Likes on Comments (Output 4)")
],
title="Instagram Comment Analyzer Pro",
description="Расширенный анализатор комментариев Instagram с детальной аналитикой"
)
return iface
if __name__ == "__main__":
iface = create_interface()
iface.launch()