Update app.py
Browse files
app.py
CHANGED
@@ -1,532 +1,113 @@
|
|
|
|
1 |
import re
|
2 |
-
import emoji
|
3 |
-
import statistics
|
4 |
-
from collections import Counter
|
5 |
-
from typing import Dict, List, Tuple, Optional, Set, Union
|
6 |
import logging
|
7 |
-
from
|
8 |
-
from datetime import datetime
|
9 |
-
import csv
|
10 |
-
from dataclasses import dataclass, asdict
|
11 |
-
from enum import Enum
|
12 |
-
import numpy as np
|
13 |
|
14 |
-
|
15 |
-
log_dir = Path("logs")
|
16 |
-
log_dir.mkdir(exist_ok=True)
|
17 |
-
logging.basicConfig(
|
18 |
-
level=logging.INFO,
|
19 |
-
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
20 |
-
handlers=[
|
21 |
-
logging.FileHandler(log_dir / f'analyzer_{datetime.now():%Y%m%d}.log'),
|
22 |
-
logging.StreamHandler()
|
23 |
-
]
|
24 |
-
)
|
25 |
logger = logging.getLogger(__name__)
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
@dataclass
|
35 |
-
class CommentData:
|
36 |
-
username: str
|
37 |
-
text: str
|
38 |
-
likes: int
|
39 |
-
weeks_ago: float
|
40 |
-
sentiment: Sentiment
|
41 |
-
|
42 |
-
class TextAnalyzer:
|
43 |
-
"""Enhanced text analysis utilities"""
|
44 |
-
|
45 |
-
@staticmethod
|
46 |
-
def clean_text(text: str) -> str:
|
47 |
-
"""Clean text using more efficient string splitting"""
|
48 |
-
return ' '.join(text.split())
|
49 |
-
|
50 |
-
@staticmethod
|
51 |
-
def count_emojis(text: str) -> int:
|
52 |
-
"""Count emojis using set operations for better performance"""
|
53 |
-
return len({c for c in text if c in emoji.EMOJI_DATA})
|
54 |
-
|
55 |
-
@staticmethod
|
56 |
-
def extract_mentions(text: str) -> Set[str]:
|
57 |
-
"""Extract mentions returning a set for uniqueness"""
|
58 |
-
return set(re.findall(r'@[\w.]+', text))
|
59 |
-
|
60 |
-
@staticmethod
|
61 |
-
def get_words(text: str) -> List[str]:
|
62 |
-
"""Extract meaningful words using improved regex"""
|
63 |
-
return [w for w in re.findall(r'\b\w{3,}\b', text.lower())]
|
64 |
-
|
65 |
-
class SentimentAnalyzer:
|
66 |
-
"""Enhanced sentiment analysis with gradual classification"""
|
67 |
-
|
68 |
-
# Using sets for O(1) lookup
|
69 |
-
INDICATORS = {
|
70 |
-
'positive': {
|
71 |
-
'🔥', '❤️', '👍', '😊', '💪', '👏', '🎉', '♥️', '😍', '🙏',
|
72 |
-
'круто', 'супер', 'класс', 'огонь', 'пушка', 'отлично', 'здорово',
|
73 |
-
'прекрасно', 'молодец', 'красота', 'спасибо', 'топ', 'лучший',
|
74 |
-
'amazing', 'wonderful', 'great', 'perfect', 'love', 'beautiful'
|
75 |
-
},
|
76 |
-
'negative': {
|
77 |
-
'👎', '😢', '😞', '😠', '😡', '💔', '😕', '😑',
|
78 |
-
'плохо', 'ужас', 'отстой', 'фу', 'жесть', 'ужасно',
|
79 |
-
'разочарован', 'печаль', 'грустно', 'bad', 'worst',
|
80 |
-
'terrible', 'awful', 'sad', 'disappointed'
|
81 |
-
}
|
82 |
-
}
|
83 |
-
|
84 |
-
@classmethod
|
85 |
-
def analyze(cls, text: str) -> Sentiment:
|
86 |
-
"""
|
87 |
-
Analyze text sentiment with enhanced granularity and emphasis handling
|
88 |
-
"""
|
89 |
-
text_lower = text.lower()
|
90 |
-
words = set(cls.TextAnalyzer.get_words(text_lower))
|
91 |
-
|
92 |
-
pos_count = len(words & cls.INDICATORS['positive'])
|
93 |
-
neg_count = len(words & cls.INDICATORS['negative'])
|
94 |
-
|
95 |
-
# Calculate emphasis multiplier based on punctuation
|
96 |
-
emphasis = min(text.count('!') * 0.2 + text.count('?') * 0.1, 1.0)
|
97 |
-
|
98 |
-
# Apply emphasis to the dominant sentiment
|
99 |
-
if pos_count > neg_count:
|
100 |
-
pos_count *= (1 + emphasis)
|
101 |
-
elif neg_count > pos_count:
|
102 |
-
neg_count *= (1 + emphasis)
|
103 |
-
|
104 |
-
# Determine sentiment with granularity
|
105 |
-
total = pos_count + neg_count
|
106 |
-
if total == 0:
|
107 |
-
return Sentiment.NEUTRAL
|
108 |
-
|
109 |
-
ratio = pos_count / total
|
110 |
-
if ratio > 0.8:
|
111 |
-
return Sentiment.POSITIVE
|
112 |
-
elif ratio > 0.6:
|
113 |
-
return Sentiment.SLIGHTLY_POSITIVE
|
114 |
-
elif ratio < 0.2:
|
115 |
-
return Sentiment.NEGATIVE
|
116 |
-
elif ratio < 0.4:
|
117 |
-
return Sentiment.SLIGHTLY_NEGATIVE
|
118 |
-
return Sentiment.NEUTRAL
|
119 |
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
likes?:\s*(?P<count4>\d+)
|
148 |
-
)
|
149 |
-
""", re.VERBOSE),
|
150 |
-
|
151 |
-
'metadata': re.compile(r"""
|
152 |
-
Фото\sпрофиля[^\n]+\n|
|
153 |
-
\d+\s*(?:ч|нед|h|w|час|hour|week)\.?|
|
154 |
-
(?:Нравится|likes?):\s*\d+|
|
155 |
-
\d+\s*отметк[аи]\s\"Нравится\"|
|
156 |
-
Ответить|
|
157 |
-
Показать\sперевод|
|
158 |
-
Скрыть\sвсе\sответы|
|
159 |
-
Смотреть\sвсе\sответы\s\(\d+\)
|
160 |
-
""", re.VERBOSE)
|
161 |
-
}
|
162 |
-
|
163 |
-
@classmethod
|
164 |
-
def extract_data(cls, comment_text: str) -> Optional[CommentData]:
|
165 |
-
"""Extract comment data with improved error handling"""
|
166 |
-
try:
|
167 |
-
# Extract username
|
168 |
-
username_match = cls.PATTERNS['username'].search(comment_text)
|
169 |
-
if not username_match:
|
170 |
-
raise cls.ParseError("Could not extract username")
|
171 |
-
|
172 |
-
username = next(
|
173 |
-
name for name in username_match.groups()
|
174 |
-
if name is not None
|
175 |
-
).strip()
|
176 |
-
|
177 |
-
# Clean comment text
|
178 |
-
comment = cls.PATTERNS['metadata'].sub('', comment_text)
|
179 |
-
comment = TextAnalyzer.clean_text(comment)
|
180 |
-
|
181 |
-
# Extract time
|
182 |
-
time_match = cls.PATTERNS['time'].search(comment_text)
|
183 |
-
if not time_match:
|
184 |
-
weeks = 0
|
185 |
-
else:
|
186 |
-
value = int(time_match.group('value'))
|
187 |
-
unit = time_match.group('unit')
|
188 |
-
weeks = value if unit in {'нед', 'w', 'week'} else value / (24 * 7)
|
189 |
-
|
190 |
-
# Extract likes
|
191 |
-
likes_match = cls.PATTERNS['likes'].search(comment_text)
|
192 |
-
likes = next(
|
193 |
-
(int(count) for count in likes_match.groups() if count),
|
194 |
-
0
|
195 |
-
) if likes_match else 0
|
196 |
-
|
197 |
-
# Analyze sentiment
|
198 |
-
sentiment = SentimentAnalyzer.analyze(comment)
|
199 |
-
|
200 |
-
return CommentData(
|
201 |
-
username=username,
|
202 |
-
text=comment,
|
203 |
-
likes=likes,
|
204 |
-
weeks_ago=weeks,
|
205 |
-
sentiment=sentiment
|
206 |
-
)
|
207 |
-
|
208 |
-
except cls.ParseError as e:
|
209 |
-
logger.warning(f"Failed to parse comment: {e}")
|
210 |
-
return None
|
211 |
-
except Exception as e:
|
212 |
-
logger.error(f"Unexpected error parsing comment: {e}", exc_info=True)
|
213 |
-
return None
|
214 |
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
@staticmethod
|
219 |
-
def calculate_period_stats(comments: List[CommentData]) -> Dict:
|
220 |
-
"""Calculate statistics using quantile-based periods"""
|
221 |
-
if not comments:
|
222 |
-
return {}
|
223 |
-
|
224 |
-
# Sort by weeks
|
225 |
-
sorted_comments = sorted(comments, key=lambda x: x.weeks_ago)
|
226 |
-
|
227 |
-
# Calculate period boundaries using quantiles
|
228 |
-
weeks = [c.weeks_ago for c in sorted_comments]
|
229 |
-
boundaries = np.quantile(weeks, [0.33, 0.67])
|
230 |
-
|
231 |
-
# Group comments by period
|
232 |
-
periods = {
|
233 |
-
'early': [],
|
234 |
-
'middle': [],
|
235 |
-
'late': []
|
236 |
-
}
|
237 |
-
|
238 |
-
for comment in sorted_comments:
|
239 |
-
if comment.weeks_ago <= boundaries[0]:
|
240 |
-
periods['early'].append(comment)
|
241 |
-
elif comment.weeks_ago <= boundaries[1]:
|
242 |
-
periods['middle'].append(comment)
|
243 |
-
else:
|
244 |
-
periods['late'].append(comment)
|
245 |
-
|
246 |
-
# Calculate statistics for each period
|
247 |
-
return {
|
248 |
-
period: {
|
249 |
-
'comments': len(comments),
|
250 |
-
'avg_likes': statistics.mean(c.likes for c in comments) if comments else 0,
|
251 |
-
'sentiment_ratio': sum(
|
252 |
-
1 for c in comments
|
253 |
-
if c.sentiment in {Sentiment.POSITIVE, Sentiment.SLIGHTLY_POSITIVE}
|
254 |
-
) / len(comments) if comments else 0
|
255 |
-
}
|
256 |
-
for period, comments in periods.items()
|
257 |
-
}
|
258 |
|
259 |
-
def analyze_post(
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
post_date: str,
|
264 |
-
description: str,
|
265 |
-
comment_count: int,
|
266 |
-
all_comments: str
|
267 |
-
) -> Tuple[str, str, str, str, str]:
|
268 |
-
"""Enhanced post analysis with improved error handling and reporting"""
|
269 |
try:
|
270 |
-
#
|
271 |
-
|
272 |
-
|
273 |
-
re.MULTILINE
|
274 |
-
)
|
275 |
-
comments_blocks = [
|
276 |
-
block.strip() for block in comment_pattern.split(all_comments)
|
277 |
-
if block and block.strip() and 'Скрыто алгоритмами Instagram' not in block
|
278 |
-
]
|
279 |
|
280 |
-
# Extract and validate comment data
|
281 |
comments_data = []
|
282 |
-
for block in comments_blocks:
|
283 |
-
if data := CommentExtractor.extract_data(block):
|
284 |
-
comments_data.append(data)
|
285 |
-
|
286 |
-
if not comments_data:
|
287 |
-
logger.warning("No valid comments found in the input")
|
288 |
-
return "No valid comments found", "", "", "", "0"
|
289 |
-
|
290 |
-
# Calculate statistics
|
291 |
-
basic_stats = {
|
292 |
-
'total_comments': len(comments_data),
|
293 |
-
'avg_length': statistics.mean(len(c.text) for c in comments_data),
|
294 |
-
'median_length': statistics.median(len(c.text) for c in comments_data),
|
295 |
-
'avg_words': statistics.mean(len(TextAnalyzer.get_words(c.text)) for c in comments_data),
|
296 |
-
'total_likes': sum(c.likes for c in comments_data),
|
297 |
-
'avg_likes': statistics.mean(c.likes for c in comments_data)
|
298 |
-
}
|
299 |
-
|
300 |
-
# Generate reports
|
301 |
-
reports = generate_reports(
|
302 |
-
content_type=content_type,
|
303 |
-
link_to_post=link_to_post,
|
304 |
-
post_likes=post_likes,
|
305 |
-
comments_data=comments_data,
|
306 |
-
basic_stats=basic_stats
|
307 |
-
)
|
308 |
-
|
309 |
-
return (
|
310 |
-
reports['analytics'],
|
311 |
-
"\n".join(c.username for c in comments_data),
|
312 |
-
"\n".join(c.text for c in comments_data),
|
313 |
-
"\n".join(str(c.likes) for c in comments_data),
|
314 |
-
str(basic_stats['total_likes'])
|
315 |
-
)
|
316 |
|
317 |
-
|
318 |
-
|
319 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
320 |
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
basic_stats: Dict
|
327 |
-
) -> Dict[str, str]:
|
328 |
-
"""Generate comprehensive reports in multiple formats"""
|
329 |
-
|
330 |
-
# Calculate additional statistics
|
331 |
-
sentiment_dist = Counter(c.sentiment for c in comments_data)
|
332 |
-
period_stats = StatsCalculator.calculate_period_stats(comments_data)
|
333 |
-
top_users = Counter(c.username for c in comments_data).most_common(5)
|
334 |
-
top_mentioned = Counter(
|
335 |
-
mention for c in comments_data
|
336 |
-
for mention in TextAnalyzer.extract_mentions(c.text)
|
337 |
-
).most_common(5)
|
338 |
-
|
339 |
-
# Generate CSV report
|
340 |
-
csv_output = StringIO()
|
341 |
-
writer = csv.writer(csv_output)
|
342 |
-
|
343 |
-
# Write metadata
|
344 |
-
writer.writerow(['Content Analysis Report'])
|
345 |
-
writer.writerow(['Generated', datetime.now().isoformat()])
|
346 |
-
writer.writerow(['Content Type', content_type])
|
347 |
-
writer.writerow(['Post URL', link_to_post])
|
348 |
-
writer.writerow(['Post Likes', post_likes])
|
349 |
-
writer.writerow([])
|
350 |
-
|
351 |
-
# Write statistics sections
|
352 |
-
for section, data in {
|
353 |
-
'Basic Statistics': basic_stats,
|
354 |
-
'Sentiment Distribution': sentiment_dist,
|
355 |
-
'Period Analysis': period_stats,
|
356 |
-
'Top Users': dict(top_users),
|
357 |
-
'Top Mentioned': dict(top_mentioned)
|
358 |
-
}.items():
|
359 |
-
writer.writerow([section])
|
360 |
-
for key, value in data.items():
|
361 |
-
writer.writerow([key, value])
|
362 |
-
writer.writerow([])
|
363 |
-
|
364 |
-
# Generate text report
|
365 |
-
text_report = (
|
366 |
-
f"ANALYSIS REPORT\n"
|
367 |
-
f"Generated: {datetime.now():%Y-%m-%d %H:%M:%S}\n\n"
|
368 |
-
f"BASIC STATISTICS:\n"
|
369 |
-
f"- Total Comments: {basic_stats['total_comments']}\n"
|
370 |
-
f"- Average Likes: {basic_stats['avg_likes']:.1f}\n"
|
371 |
-
f"- Average Length: {basic_stats['avg_length']:.1f} characters\n"
|
372 |
-
f"- Median Length: {basic_stats['median_length']}\n"
|
373 |
-
f"- Average Words: {basic_stats['avg_words']:.1f}\n\n"
|
374 |
-
f"SENTIMENT ANALYSIS:\n"
|
375 |
-
f"- Positive: {sentiment_dist[Sentiment.POSITIVE]}\n"
|
376 |
-
f"- Slightly Positive: {sentiment_dist[Sentiment.SLIGHTLY_POSITIVE]}\n"
|
377 |
-
f"- Neutral: {sentiment_dist[Sentiment.NEUTRAL]}\n"
|
378 |
-
f"- Slightly Negative: {sentiment_dist[Sentiment.SLIGHTLY_NEGATIVE]}\n"
|
379 |
-
f"- Negative: {sentiment_dist[Sentiment.NEGATIVE]}\n\n"
|
380 |
-
f"TOP CONTRIBUTORS:\n" +
|
381 |
-
"\n".join(f"- {user}: {count} comments" for user, count in top_users) +
|
382 |
-
f"\n\nMOST MENTIONED:\n""\n".join(f"- {user}: {count} mentions" for user, count in top_mentioned) +
|
383 |
-
f"\n\nENGAGEMENT PERIODS:\n"
|
384 |
-
f"Early Period:\n"
|
385 |
-
f"- Comments: {period_stats['early']['comments']}\n"
|
386 |
-
f"- Avg Likes: {period_stats['early']['avg_likes']:.1f}\n"
|
387 |
-
f"- Positive Sentiment: {period_stats['early']['sentiment_ratio']*100:.1f}%\n\n"
|
388 |
-
f"Middle Period:\n"
|
389 |
-
f"- Comments: {period_stats['middle']['comments']}\n"
|
390 |
-
f"- Avg Likes: {period_stats['middle']['avg_likes']:.1f}\n"
|
391 |
-
f"- Positive Sentiment: {period_stats['middle']['sentiment_ratio']*100:.1f}%\n\n"
|
392 |
-
f"Late Period:\n"
|
393 |
-
f"- Comments: {period_stats['late']['comments']}\n"
|
394 |
-
f"- Avg Likes: {period_stats['late']['avg_likes']:.1f}\n"
|
395 |
-
f"- Positive Sentiment: {period_stats['late']['sentiment_ratio']*100:.1f}%\n"
|
396 |
-
)
|
397 |
-
|
398 |
-
return {
|
399 |
-
'csv': csv_output.getvalue(),
|
400 |
-
'analytics': text_report
|
401 |
-
}
|
402 |
|
403 |
-
|
404 |
-
|
|
|
|
|
|
|
|
|
405 |
|
406 |
-
|
407 |
-
description: str, comment_count: int, comments: str) -> Tuple[bool, str]:
|
408 |
-
"""Validate input parameters before processing"""
|
409 |
-
if not link:
|
410 |
-
return False, "Post link is required"
|
411 |
-
if likes < 0:
|
412 |
-
return False, "Likes count cannot be negative"
|
413 |
-
if comment_count < 0:
|
414 |
-
return False, "Comment count cannot be negative"
|
415 |
-
if not comments.strip():
|
416 |
-
return False, "Comments text is required"
|
417 |
-
return True, ""
|
418 |
|
419 |
-
def wrapped_analyze_post(*args):
|
420 |
-
"""Wrapper for analyze_post with input validation"""
|
421 |
-
is_valid, error_message = validate_input(*args)
|
422 |
-
if not is_valid:
|
423 |
-
return error_message, "", "", "", "0"
|
424 |
-
|
425 |
-
try:
|
426 |
-
return analyze_post(*args)
|
427 |
except Exception as e:
|
428 |
-
logger.error(f"
|
429 |
-
return
|
430 |
|
431 |
-
#
|
432 |
iface = gr.Interface(
|
433 |
-
fn=
|
434 |
inputs=[
|
435 |
-
gr.Radio(
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
),
|
440 |
-
gr.
|
441 |
-
|
442 |
-
placeholder="https://instagram.com/p/..."
|
443 |
-
),
|
444 |
-
gr.Number(
|
445 |
-
label="Post Likes",
|
446 |
-
value=0,
|
447 |
-
minimum=0
|
448 |
-
),
|
449 |
-
gr.Textbox(
|
450 |
-
label="Post Date",
|
451 |
-
placeholder="YYYY-MM-DD"
|
452 |
-
),
|
453 |
-
gr.Textbox(
|
454 |
-
label="Post Description",
|
455 |
-
lines=3,
|
456 |
-
placeholder="Enter post description..."
|
457 |
-
),
|
458 |
-
gr.Number(
|
459 |
-
label="Total Comment Count",
|
460 |
-
value=0,
|
461 |
-
minimum=0
|
462 |
-
),
|
463 |
-
gr.Textbox(
|
464 |
-
label="Comments",
|
465 |
-
lines=10,
|
466 |
-
placeholder="Paste comments here..."
|
467 |
-
)
|
468 |
],
|
469 |
outputs=[
|
470 |
-
gr.Textbox(
|
471 |
-
|
472 |
-
|
473 |
-
),
|
474 |
-
gr.Textbox(
|
475 |
-
label="Extracted Usernames"
|
476 |
-
),
|
477 |
-
gr.Textbox(
|
478 |
-
label="Cleaned Comments"
|
479 |
-
),
|
480 |
-
gr.Textbox(
|
481 |
-
label="Comment Likes Timeline"
|
482 |
-
),
|
483 |
-
gr.Textbox(
|
484 |
-
label="Total Comment Likes"
|
485 |
-
)
|
486 |
],
|
487 |
-
title="
|
488 |
-
description=""
|
489 |
-
Analyze Instagram comments with advanced metrics including:
|
490 |
-
- Sentiment analysis with granular classification
|
491 |
-
- Temporal engagement patterns
|
492 |
-
- User interaction statistics
|
493 |
-
- Content quality metrics
|
494 |
-
""",
|
495 |
-
article="""
|
496 |
-
### Usage Instructions
|
497 |
-
1. Select the content type (Photo, Video, Reel, or Story)
|
498 |
-
2. Paste the post URL
|
499 |
-
3. Enter the post metadata (likes, date, description)
|
500 |
-
4. Paste the comments text
|
501 |
-
5. Click submit to generate analysis
|
502 |
-
|
503 |
-
### Analysis Features
|
504 |
-
- Multi-level sentiment analysis
|
505 |
-
- Engagement period breakdown
|
506 |
-
- Top contributors and mentions
|
507 |
-
- Detailed statistical metrics
|
508 |
-
|
509 |
-
### Notes
|
510 |
-
- All text fields support Unicode characters including emojis
|
511 |
-
- Time references are converted to a standardized format
|
512 |
-
- Analysis includes both quantitative and qualitative metrics
|
513 |
-
"""
|
514 |
)
|
515 |
|
516 |
if __name__ == "__main__":
|
517 |
-
|
518 |
-
logger.info("Starting Instagram Comment Analyzer")
|
519 |
-
|
520 |
-
try:
|
521 |
-
# Launch the interface with enhanced settings
|
522 |
-
iface.launch(
|
523 |
-
server_name="0.0.0.0", # Allow external access
|
524 |
-
server_port=7860, # Default Gradio port
|
525 |
-
share=False, # Disable public URL generation
|
526 |
-
debug=False, # Disable debug mode in production
|
527 |
-
enable_queue=True, # Enable request queuing
|
528 |
-
max_threads=4 # Limit concurrent processing
|
529 |
-
)
|
530 |
-
except Exception as e:
|
531 |
-
logger.error(f"Failed to start application: {e}", exc_info=True)
|
532 |
-
raise
|
|
|
1 |
+
import gradio as gr
|
2 |
import re
|
|
|
|
|
|
|
|
|
3 |
import logging
|
4 |
+
from typing import Tuple, Optional
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
+
logging.basicConfig(level=logging.INFO)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
logger = logging.getLogger(__name__)
|
8 |
|
9 |
+
def extract_comment_data(comment_text: str) -> Tuple[Optional[str], Optional[str], int, int]:
|
10 |
+
"""Извлекает данные из комментария"""
|
11 |
+
try:
|
12 |
+
# Пропускаем информацию о посте
|
13 |
+
if 'отметок "Нравится"' in comment_text:
|
14 |
+
return None, None, 0, 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
+
# Извлекаем имя пользователя
|
17 |
+
username_match = re.search(r"Фото профиля ([^\n]+)", comment_text)
|
18 |
+
if not username_match:
|
19 |
+
return None, None, 0, 0
|
20 |
+
|
21 |
+
username = username_match.group(1).strip()
|
22 |
+
|
23 |
+
# Извлекаем текст комментария
|
24 |
+
lines = comment_text.split('\n')
|
25 |
+
comment = ""
|
26 |
+
for i, line in enumerate(lines):
|
27 |
+
if username in line and i + 1 < len(lines):
|
28 |
+
comment = lines[i + 1].strip()
|
29 |
+
# Очищаем комментарий
|
30 |
+
comment = re.sub(r'\d+\s*(?:ч\.|нед\.)\s*$', '', comment)
|
31 |
+
comment = re.sub(r'"Нравится":\s*\d+\s*Ответить\s*$', '', comment)
|
32 |
+
break
|
33 |
+
|
34 |
+
# Извлекаем лайки
|
35 |
+
likes_match = re.search(r'"Нравится":\s*(\d+)', comment_text)
|
36 |
+
likes = int(likes_match.group(1)) if likes_match else 0
|
37 |
+
|
38 |
+
# Извлекаем время
|
39 |
+
time_match = re.search(r'(\d+)\s*(?:ч\.|нед\.)', comment_text)
|
40 |
+
time = int(time_match.group(1)) if time_match else 0
|
41 |
+
|
42 |
+
return username, comment.strip(), likes, time
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
+
except Exception as e:
|
45 |
+
logger.error(f"Error extracting data: {e}")
|
46 |
+
return None, None, 0, 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
+
def analyze_post(content_type: str, link: str, post_likes: int,
|
49 |
+
post_date: str, description: str, comment_count: int,
|
50 |
+
all_comments: str) -> Tuple[str, str, str, str, str]:
|
51 |
+
"""Анализирует пост и комментарии"""
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
try:
|
53 |
+
# Разделяем на блоки комментариев
|
54 |
+
blocks = re.split(r'(?=Фото профиля)', all_comments)
|
55 |
+
blocks = [b.strip() for b in blocks if b.strip()]
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
|
|
57 |
comments_data = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
+
# Обрабатываем каждый блок
|
60 |
+
for block in blocks:
|
61 |
+
username, comment, likes, time = extract_comment_data(block)
|
62 |
+
if username and comment:
|
63 |
+
comments_data.append({
|
64 |
+
'username': username,
|
65 |
+
'comment': comment,
|
66 |
+
'likes': likes,
|
67 |
+
'time': time
|
68 |
+
})
|
69 |
|
70 |
+
# Формируем выходные данные
|
71 |
+
usernames = "\n".join(item['username'] for item in comments_data)
|
72 |
+
comments = "\n".join(item['comment'] for item in comments_data)
|
73 |
+
likes = "\n".join(str(item['likes']) for item in comments_data)
|
74 |
+
total_likes = sum(item['likes'] for item in comments_data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
|
76 |
+
analytics = f"""
|
77 |
+
📊 Анализ комментариев:
|
78 |
+
Всего комментариев: {len(comments_data)}
|
79 |
+
Уникальных пользователей: {len(set(item['username'] for item in comments_data))}
|
80 |
+
Общее количество лайков: {total_likes}
|
81 |
+
"""
|
82 |
|
83 |
+
return analytics, usernames, comments, likes, str(total_likes)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
except Exception as e:
|
86 |
+
logger.error(f"Analysis error: {e}")
|
87 |
+
return str(e), "", "", "", "0"
|
88 |
|
89 |
+
# Интерфейс Gradio
|
90 |
iface = gr.Interface(
|
91 |
+
fn=analyze_post,
|
92 |
inputs=[
|
93 |
+
gr.Radio(choices=["Photo", "Video"], label="Content Type", value="Photo"),
|
94 |
+
gr.Textbox(label="Link to Post"),
|
95 |
+
gr.Number(label="Likes", value=0),
|
96 |
+
gr.Textbox(label="Post Date"),
|
97 |
+
gr.Textbox(label="Description", lines=3),
|
98 |
+
gr.Number(label="Comment Count", value=0),
|
99 |
+
gr.Textbox(label="Comments", lines=10)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
],
|
101 |
outputs=[
|
102 |
+
gr.Textbox(label="Analytics Summary", lines=10),
|
103 |
+
gr.Textbox(label="Usernames"),
|
104 |
+
gr.Textbox(label="Comments"),
|
105 |
+
gr.Textbox(label="Likes Chronology"),
|
106 |
+
gr.Textbox(label="Total Likes on Comments")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
],
|
108 |
+
title="Instagram Comment Analyzer",
|
109 |
+
description="Анализатор комментариев Instagram"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
)
|
111 |
|
112 |
if __name__ == "__main__":
|
113 |
+
iface.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|