Update app.py
Browse files
app.py
CHANGED
@@ -197,6 +197,81 @@ if __name__ == "__main__":
|
|
197 |
)
|
198 |
except Exception as e:
|
199 |
logger.error(f"Error launching interface: {e}", exc_info=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
200 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
|
202 |
-
|
|
|
197 |
)
|
198 |
except Exception as e:
|
199 |
logger.error(f"Error launching interface: {e}", exc_info=True)
|
200 |
+
import re
|
201 |
+
import emoji
|
202 |
+
import gradio as gr
|
203 |
+
from collections import defaultdict, Counter
|
204 |
+
|
205 |
+
def extract_comment_data(comment_text: str) -> dict:
|
206 |
+
"""Extracts data from a comment string."""
|
207 |
+
comment_data = {}
|
208 |
+
|
209 |
+
# Username extraction (improved robustness)
|
210 |
+
match = re.search(r"Фото профиля\s*(.+?)\n", comment_text)
|
211 |
+
comment_data["username"] = match.group(1).strip() if match else None
|
212 |
+
if not comment_data["username"]:
|
213 |
+
return None # Skip if no username found
|
214 |
+
|
215 |
+
# Comment text extraction (handling multiple lines & various time formats)
|
216 |
+
lines = comment_text.splitlines()
|
217 |
+
comment_text = ""
|
218 |
+
for i, line in enumerate(lines):
|
219 |
+
if re.search(r"\d+\s*(?:нед\.|ч\.)", line): #Matches days or hours
|
220 |
+
comment_text = "\n".join(lines[i+1:]).strip()
|
221 |
+
break
|
222 |
+
comment_text += line + "\n"
|
223 |
+
comment_text = comment_text.strip()
|
224 |
+
comment_data["comment"] = comment_text
|
225 |
+
|
226 |
+
# Likes extraction (more flexible regex)
|
227 |
+
match = re.search(r'"Нравится":\s*(\d+)', comment_text)
|
228 |
+
comment_data["likes"] = int(match.group(1)) if match else 0
|
229 |
+
|
230 |
+
# Time extraction (more robust to variations)
|
231 |
+
time_match = re.search(r"(\d+)\s*(?:нед\.|ч\.)", comment_text)
|
232 |
+
comment_data["time"] = int(time_match.group(1)) if time_match else None
|
233 |
+
|
234 |
+
|
235 |
+
return comment_data
|
236 |
+
|
237 |
+
def analyze_comments(comments_text: str) -> dict:
|
238 |
+
"""Analyzes a block of comments text."""
|
239 |
+
comments = []
|
240 |
+
blocks = re.split(r'(Фото профиля)', comments_text, flags=re.IGNORECASE)
|
241 |
+
for i in range(1,len(blocks),2):
|
242 |
+
comment_data = extract_comment_data(blocks[i])
|
243 |
+
if comment_data:
|
244 |
+
comments.append(comment_data)
|
245 |
+
|
246 |
+
# Aggregate data
|
247 |
+
analytics = defaultdict(int)
|
248 |
+
unique_users = set()
|
249 |
+
top_commenters = Counter()
|
250 |
|
251 |
+
for comment in comments:
|
252 |
+
analytics["total_comments"] += 1
|
253 |
+
unique_users.add(comment["username"])
|
254 |
+
analytics["total_likes"] += comment["likes"]
|
255 |
+
top_commenters[comment["username"]] += 1
|
256 |
+
analytics["emojis"] += len(emoji.demojize(comment["comment"])) # Counts emojis
|
257 |
+
|
258 |
+
analytics["unique_users"] = len(unique_users)
|
259 |
+
analytics["avg_likes"] = analytics["total_likes"] / analytics["total_comments"] if analytics["total_comments"] > 0 else 0
|
260 |
+
analytics["top_commenters"] = dict(top_commenters.most_common(5))
|
261 |
+
|
262 |
+
|
263 |
+
return analytics, comments
|
264 |
+
|
265 |
+
|
266 |
+
iface = gr.Interface(
|
267 |
+
fn=analyze_comments,
|
268 |
+
inputs=gr.Textbox(label="Instagram Comments (Paste here)", lines=10),
|
269 |
+
outputs=[
|
270 |
+
gr.Textbox(label="Analytics Summary"),
|
271 |
+
gr.JSON(label="Individual Comment Data")
|
272 |
+
],
|
273 |
+
title="Enhanced Instagram Comment Analyzer",
|
274 |
+
description="Improved analyzer for Instagram comments.",
|
275 |
+
)
|
276 |
|
277 |
+
iface.launch(share=True)
|