Spaces:

KIMOSSINO
/

hashtagtik

Sleeping

App Files Files Community

KIMOSSINO commited on Dec 10, 2024

Commit

4e6c980

verified ·

1 Parent(s): 6e18263

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -71

app.py CHANGED Viewed

@@ -2,8 +2,7 @@ import gradio as gr
 from collections import Counter
 from bs4 import BeautifulSoup
-def extract_titles_and_hashtags_and_views(file):
     try:
         # قراءة محتوى الملف
         if hasattr(file, 'read'):
@@ -12,104 +11,83 @@ def extract_titles_and_hashtags_and_views(file):
             with open(file.name, 'r', encoding='utf-8') as f:
                 content = f.read()
     except Exception as e:
-        return f"خطأ أثناء قراءة الملف: {str(e)}", "", "", ""
     # تحليل HTML باستخدام BeautifulSoup
     try:
         soup = BeautifulSoup(content, 'html.parser')
     except Exception as e:
-        return f"خطأ في تحليل محتوى HTML: {str(e)}", "", "", ""
-    # استخراج البيانات
-    data = []
-    hashtags_counter = Counter()
-    views_text = []
-    # البحث عن الحاويات
-    desc_containers = soup.find_all('div', class_="css-vi46v1-DivDesContainer")
-    if not desc_containers:
-        return "لم يتم العثور على أي بيانات مطابقة.", "", "", ""
-    # معالجة كل حاوية
-    for container in desc_containers:
-        # البحث عن العنوان
-        title = (
-            container.find('h2', class_='title')
-            or container.find('h1', class_='title')
-            or container.find('div', class_='title')
-            or container.find(class_='title')
-        )
-        title = title.get_text(strip=True) if title else container.get('aria-label', 'بدون عنوان').strip()
         # استخراج الهاشتاغات
-        hashtags = [tag.get_text(strip=True) for tag in container.find_all('a') if tag.get_text(strip=True).startswith('#')]
-        if hashtags:
-            hashtags_counter.update(hashtags)
-        # استخراج نسبة المشاهدة
-        view = container.find('strong', class_="video-count css-dirst9-StrongVideoCount e148ts222")
-        view = view.get_text(strip=True) if view else "غير متوفر"
-        # تخزين البيانات
-        data.append({
-            "Title": title,
-            "Hashtags": ", ".join(hashtags),
-            "Views": view
-        })
-        views_text.append(f"{title}: {view}")
-    # إعداد النصوص النهائية
-    titles_text = "\n".join(f"{i+1}. {row['Title']}" for i, row in enumerate(data) if row['Title'] != 'بدون عنوان')
-    hashtags_text = "\n".join(f"{hashtag}: {count}" for hashtag, count in sorted(hashtags_counter.items(), key=lambda x: (-x[1], x[0])))
-    views_summary_text = "\n".join(views_text)
-    return (
-        titles_text or "لا توجد عناوين مستخرجة.",
-        hashtags_text or "لا توجد هاشتاغات مستخرجة.",
-        views_summary_text or "لا توجد بيانات مشاهدة."
     )
 # إنشاء واجهة Gradio
 def gradio_interface():
     with gr.Blocks() as demo:
-        gr.Markdown("## 📝 محلل النصوص المتقدم")
         with gr.Row():
-            file_input = gr.File(label="📂 رفع ملف TXT", file_types=[".txt"])
         with gr.Row():
             analyze_btn = gr.Button("تحليل البيانات", variant="primary")
         with gr.Row():
-            titles_output = gr.Textbox(
-                label="📜 العناوين المستخرجة",
-                lines=10,
-                interactive=False,
-                placeholder="ستظهر العناوين هنا"
-            )
-            hashtags_output = gr.Textbox(
-                label="🏷️ الهاشتاغات المستخرجة (مع التكرار)",
-                lines=10,
                 interactive=False,
-                placeholder="ستظهر الهاشتاغات هنا"
-            )
-            views_output = gr.Textbox(
-                label="👀 نسبة المشاهدة",
-                lines=10,
-                interactive=False,
-                placeholder="ستظهر نسب المشاهدة هنا"
             )
         analyze_btn.click(
-            fn=extract_titles_and_hashtags_and_views,
             inputs=[file_input],
-            outputs=[titles_output, hashtags_output, views_output],
         )
     return demo
 # تشغيل التطبيق
 if __name__ == "__main__":
     demo = gradio_interface()

 from collections import Counter
 from bs4 import BeautifulSoup
+def analyze_videos(file):
     try:
         # قراءة محتوى الملف
         if hasattr(file, 'read'):
             with open(file.name, 'r', encoding='utf-8') as f:
                 content = f.read()
     except Exception as e:
+        return f"خطأ أثناء قراءة الملف: {str(e)}", ""
     # تحليل HTML باستخدام BeautifulSoup
     try:
         soup = BeautifulSoup(content, 'html.parser')
     except Exception as e:
+        return f"خطأ في تحليل محتوى HTML: {str(e)}", ""
+    # استخراج بيانات الفيديوهات
+    videos_data = []
+    video_elements = soup.find_all('a', class_="css-1wrhn5c-AMetaCaptionLine")
+    if not video_elements:
+        return "لم يتم العثور على أي بيانات مطابقة.", ""
+    for video in video_elements:
+        video_info = {}
+        # استخراج الرابط
+        video_info["Link"] = video.get('href', 'رابط غير متوفر')
+        # استخراج العنوان
+        title_element = video.find('span', class_="css-j2a19r-SpanText")
+        video_info["Title"] = title_element.get_text(strip=True) if title_element else "عنوان غير متوفر"
         # استخراج الهاشتاغات
+        hashtags = [
+            tag.get_text(strip=True)
+            for tag in video.find_all('strong', class_="css-1p6dp51-StrongText")
+        ]
+        video_info["Hashtags"] = ", ".join(hashtags)
+        # استخراج عدد المشاهدات
+        views_element = video.find_next('strong', class_="css-ws4x78-StrongVideoCount")
+        video_info["Views"] = views_element.get_text(strip=True) if views_element else "عدد المشاهدات غير متوفر"
+        videos_data.append(video_info)
+    # تجهيز النصوص النهائية
+    videos_summary = "\n\n".join(
+        f"📹 الفيديو {i+1}:\n"
+        f"- الرابط: {video['Link']}\n"
+        f"- العنوان: {video['Title']}\n"
+        f"- الهاشتاغات: {video['Hashtags']}\n"
+        f"- عدد المشاهدات: {video['Views']}"
+        for i, video in enumerate(videos_data)
     )
+    return videos_summary or "لم يتم استخراج أي معلومات."
 # إنشاء واجهة Gradio
 def gradio_interface():
     with gr.Blocks() as demo:
+        gr.Markdown("## 📝 محلل فيديوهات تيك توك")
         with gr.Row():
+            file_input = gr.File(label="📂 رفع ملف HTML", file_types=[".html"])
         with gr.Row():
             analyze_btn = gr.Button("تحليل البيانات", variant="primary")
         with gr.Row():
+            output_box = gr.Textbox(
+                label="📜 البيانات المستخرجة",
+                lines=20,
                 interactive=False,
+                placeholder="ستظهر البيانات هنا"
             )
         analyze_btn.click(
+            fn=analyze_videos,
             inputs=[file_input],
+            outputs=[output_box],
         )
     return demo
 # تشغيل التطبيق
 if __name__ == "__main__":
     demo = gradio_interface()