boompack commited on
Commit
8c00b86
·
verified ·
1 Parent(s): bf4724c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -154
app.py CHANGED
@@ -1,114 +1,34 @@
1
  import gradio as gr
2
- import re
3
  import emoji
4
- import logging
5
- from typing import Tuple, Optional
6
- from functools import lru_cache
7
  from collections import Counter
8
 
9
- logging.basicConfig(level=logging.INFO)
10
- logger = logging.getLogger(__name__)
11
-
12
- def count_emojis(text: str) -> int:
13
- """Подсчет количества эмодзи в тексте"""
14
- return len([c for c in text if c in emoji.EMOJI_DATA])
15
-
16
- def extract_mentions(text: str) -> list:
17
- """Извлечение упоминаний пользователей"""
18
- return re.findall(r'@(\w+)', text)
19
-
20
- def is_spam(text: str) -> bool:
21
- """Определение спам-комментариев"""
22
- spam_indicators = ['🔥' * 3, '❤️' * 3, 'follow me', 'check my']
23
- return any(indicator in text.lower() for indicator in spam_indicators)
24
-
25
- def extract_comment_data(comment_text: str) -> Tuple[Optional[str], Optional[str], int, int]:
26
- """Извлекает данные из комментария"""
27
- try:
28
- # Extract username
29
- username_match = re.search(r'Фото профиля\s+(.+?)\n', comment_text)
30
- username = username_match.group(1).strip() if username_match else None
31
-
32
- if not username:
33
- return None, None, 0, 0
34
-
35
- # Extract comment text
36
- comment_lines = comment_text.split('\n')
37
- comment = ""
38
- time_pattern = r'\d+\s*(?:ч\.|нед\.)'
39
-
40
- # Identify where the comment text starts
41
- for i, line in enumerate(comment_lines):
42
- if re.search(time_pattern, line):
43
- if i + 1 < len(comment_lines):
44
- comment = comment_lines[i + 1].strip()
45
- break
46
-
47
- # Clean up comment text
48
- comment = re.sub(r'\d+\s*(?:ч\.|нед\.)\s*$', '', comment)
49
- comment = re.sub(r'"Нравится":\s*\d+\s*Ответить\s*$', '', comment)
50
-
51
- # Extract likes
52
- likes_match = re.search(r'"Нравится":\s*(\d+)', comment_text)
53
- likes = int(likes_match.group(1)) if likes_match else 0
54
-
55
- # Extract time
56
- time_match = re.search(r'(\d+)\s*(?:ч\.|нед\.)', comment_text)
57
- time = int(time_match.group(1)) if time_match else 0
58
-
59
- return username, comment.strip(), likes, time
60
-
61
- except Exception as e:
62
- logger.error(f"Error extracting data: {e}")
63
- return None, None, 0, 0
64
-
65
- @lru_cache(maxsize=100)
66
  def analyze_post(content_type: str, link: str, post_likes: int,
67
  post_date: str, description: str, comment_count: int,
68
  all_comments: str) -> Tuple[str, str, str, str, str]:
69
- """Анализирует пост и комментарии"""
70
  try:
71
- if not all_comments or 'Фото профиля' not in all_comments:
72
- return "Ошибка: неверный формат данных", "", "", "", "0"
73
 
74
- blocks = re.split(r'(?=Фото профиля)', all_comments)
75
- blocks = [b.strip() for b in blocks if b.strip()]
76
-
77
- comments_data = []
78
  total_emojis = 0
79
  mentions = []
80
  spam_count = 0
81
-
82
- for block in blocks:
83
- username, comment, likes, time = extract_comment_data(block)
84
- if username and comment:
85
- emoji_count = count_emojis(comment)
86
- comment_mentions = extract_mentions(comment)
87
- is_spam_comment = is_spam(comment)
88
-
89
- comments_data.append({
90
- 'username': username,
91
- 'comment': comment,
92
- 'likes': likes,
93
- 'time': time,
94
- 'emoji_count': emoji_count,
95
- 'mentions': comment_mentions,
96
- 'is_spam': is_spam_comment
97
- })
98
-
99
- total_emojis += emoji_count
100
- mentions.extend(comment_mentions)
101
- if is_spam_comment:
102
- spam_count += 1
103
 
104
- # Подсчет статистики
105
  total_comments = len(comments_data)
106
- unique_users = len(set(item['username'] for item in comments_data))
107
- total_likes = sum(item['likes'] for item in comments_data)
108
  avg_likes = total_likes / total_comments if total_comments > 0 else 0
109
 
110
- # Топ комментаторы
111
- commenter_counts = Counter(item['username'] for item in comments_data)
112
  top_commenters = commenter_counts.most_common(5)
113
 
114
  analytics = f"""
@@ -126,74 +46,40 @@ def analyze_post(content_type: str, link: str, post_likes: int,
126
  {chr(10).join(f'• {user}: {count} комментария' for user, count in top_commenters if count > 1)}
127
  """
128
 
129
- return (
130
- analytics,
131
- "\n".join(item['username'] for item in comments_data),
132
- "\n".join(item['comment'] for item in comments_data),
133
- "\n".join(str(item['likes']) for item in comments_data),
134
- str(total_likes)
135
- )
136
 
 
 
 
 
 
137
  except Exception as e:
138
- logger.error(f"Analysis error: {e}")
139
- return str(e), "", "", "", "0"
140
 
141
- # Создаем интерфейс Gradio
142
  iface = gr.Interface(
143
  fn=analyze_post,
144
  inputs=[
145
- gr.Radio(
146
- choices=["Photo", "Video"],
147
- label="Content Type",
148
- value="Photo"
149
- ),
150
- gr.Textbox(
151
- label="Link to Post",
152
- placeholder="Вставьте ссылку на пост"
153
- ),
154
- gr.Number(
155
- label="Likes",
156
- value=0,
157
- minimum=0
158
- ),
159
- gr.Textbox(
160
- label="Post Date",
161
- placeholder="YYYY-MM-DD"
162
- ),
163
- gr.Textbox(
164
- label="Description",
165
- lines=3,
166
- placeholder="Описание поста"
167
- ),
168
- gr.Number(
169
- label="Comment Count",
170
- value=0,
171
- minimum=0
172
- ),
173
- gr.Textbox(
174
- label="Comments",
175
- lines=10,
176
- placeholder="Вставьте комментарии"
177
- )
178
  ],
179
  outputs=[
180
- gr.Textbox(label="Analytics Summary", lines=15),
181
- gr.Textbox(label="Usernames"),
182
- gr.Textbox(label="Comments"),
183
- gr.Textbox(label="Likes Chronology"),
184
- gr.Textbox(label="Total Likes on Comments")
185
  ],
186
- title="Enhanced Instagram Comment Analyzer",
187
- description="Анализатор комментариев Instagram с расширенной аналитикой",
188
  theme="default"
189
  )
190
 
191
- if __name__ == "__main__":
192
- try:
193
- iface.launch(
194
- share=True, # Создает публичную ссылку
195
- debug=True, # Включает режим отладки
196
- show_error=True # Показывает подробности ошибок
197
- )
198
- except Exception as e:
199
- logger.error(f"Error launching interface: {e}", exc_info=True)
 
1
  import gradio as gr
2
+ import json
3
  import emoji
4
+ import re
 
 
5
  from collections import Counter
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  def analyze_post(content_type: str, link: str, post_likes: int,
8
  post_date: str, description: str, comment_count: int,
9
  all_comments: str) -> Tuple[str, str, str, str, str]:
 
10
  try:
11
+ comments_data = json.loads(all_comments)
 
12
 
 
 
 
 
13
  total_emojis = 0
14
  mentions = []
15
  spam_count = 0
16
+ total_likes = 0
17
+
18
+ for comment in comments_data:
19
+ username = comment['username']
20
+ comment_text = comment['comment']
21
+ likes = comment.get('likes', 0) #Обработка отсутствия ключа likes
22
+ total_likes += likes
23
+
24
+ total_emojis += len([c for c in comment_text if c in emoji.EMOJI_DATA])
25
+ mentions.extend(re.findall(r'@(\w+)', comment_text))
 
 
 
 
 
 
 
 
 
 
 
 
26
 
 
27
  total_comments = len(comments_data)
28
+ unique_users = len(set(comment['username'] for comment in comments_data))
 
29
  avg_likes = total_likes / total_comments if total_comments > 0 else 0
30
 
31
+ commenter_counts = Counter(comment['username'] for comment in comments_data)
 
32
  top_commenters = commenter_counts.most_common(5)
33
 
34
  analytics = f"""
 
46
  {chr(10).join(f'• {user}: {count} комментария' for user, count in top_commenters if count > 1)}
47
  """
48
 
49
+ usernames = "\n".join(comment['username'] for comment in comments_data)
50
+ comments = "\n".join(comment['comment'] for comment in comments_data)
51
+ likes_chronology = "\n".join(str(comment.get('likes', 0)) for comment in comments_data)
 
 
 
 
52
 
53
+
54
+ return (analytics, usernames, comments, likes_chronology, str(total_likes))
55
+
56
+ except json.JSONDecodeError:
57
+ return "Ошибка: Неверный JSON-ввод", "", "", "", "0"
58
  except Exception as e:
59
+ return f"Произошла ошибка: {e}", "", "", "", "0"
 
60
 
 
61
  iface = gr.Interface(
62
  fn=analyze_post,
63
  inputs=[
64
+ gr.Radio(choices=["Фото", "Видео"], label="Тип контента", value="Фото"),
65
+ gr.Textbox(label="Ссылка на пост", placeholder="Вставьте ссылку на пост"),
66
+ gr.Number(label="Лайки", value=0, minimum=0),
67
+ gr.Textbox(label="Дата поста", placeholder="YYYY-MM-DD"),
68
+ gr.Textbox(label="Описание", lines=3, placeholder="Описание поста"),
69
+ gr.Number(label="Количество комментариев", value=0, minimum=0),
70
+ gr.Textbox(label="Комментарии (JSON)", lines=10,
71
+ placeholder='Вставьте данные в формате JSON (см. пример)')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  ],
73
  outputs=[
74
+ gr.Textbox(label="Анализ", lines=15),
75
+ gr.Textbox(label="Пользователи"),
76
+ gr.Textbox(label="Комментарии"),
77
+ gr.Textbox(label="Хронология лайков"),
78
+ gr.Textbox(label="Всего лайков")
79
  ],
80
+ title="Расширенный анализатор комментариев Instagram",
81
+ description="Анализатор комментариев Instagram с расширенной аналитикой. Введите комментарии в формате JSON.",
82
  theme="default"
83
  )
84
 
85
+ iface.launch(share=True, debug=True, show_error=True)