Spaces:

KIMOSSINO
/

hashtags

Sleeping

App Files Files Community

KIMOSSINO commited on Dec 7, 2024

Commit

6fb8772

verified ·

1 Parent(s): 54fa405

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -12

app.py CHANGED Viewed

@@ -1,11 +1,14 @@
 from bs4 import BeautifulSoup
 import pandas as pd
 from collections import Counter
-def extract_data_from_html(file_path):
-    # قراءة محتوى الملف
-    with open(file_path, 'r', encoding='utf-8') as file:
-        content = file.read()
     # تحليل HTML باستخدام BeautifulSoup
     soup = BeautifulSoup(content, 'html.parser')
@@ -38,13 +41,35 @@ def extract_data_from_html(file_path):
     return df_titles, df_hashtags
-# استخدام الدالة لتحليل الملف
-file_path = "/mnt/data/Spanish.txt"
-titles_df, hashtags_df = extract_data_from_html(file_path)
-# عرض النتائج
-print("العناوين والهاشتاغات المستخرجة:")
-print(titles_df)
-print("\nالهاشتاغات مع عدد مرات تكرارها:")
-print(hashtags_df)

 from bs4 import BeautifulSoup
 import pandas as pd
 from collections import Counter
+import gradio as gr
+def extract_data_from_html(file):
+    try:
+        # قراءة محتوى الملف المرفوع
+        content = file.read().decode('utf-8')
+    except Exception as e:
+        return f"خطأ أثناء قراءة الملف: {str(e)}", None
     # تحليل HTML باستخدام BeautifulSoup
     soup = BeautifulSoup(content, 'html.parser')
     return df_titles, df_hashtags
+# واجهة Gradio
+def gradio_interface(file):
+    result = extract_data_from_html(file)
+    if isinstance(result, tuple):
+        titles_df, hashtags_df = result
+    else:
+        return result, ""
+    if titles_df is None or hashtags_df is None:
+        return "لم يتم استخراج أي بيانات.", ""
+    # تحويل النتائج إلى HTML للعرض
+    titles_html = titles_df.to_html(index=False) if not titles_df.empty else "لا توجد عناوين مستخرجة."
+    hashtags_html = hashtags_df.to_html(index=False) if not hashtags_df.empty else "لا توجد هاشتاغات مستخرجة."
+    return titles_html, hashtags_html
+# إنشاء واجهة Gradio
+interface = gr.Interface(
+    fn=gradio_interface,
+    inputs=gr.File(label="ارفع ملف HTML"),
+    outputs=[
+        gr.HTML(label="العناوين والهاشتاغات المستخرجة"),
+        gr.HTML(label="الهاشتاغات مع عدد مرات تكرارها")
+    ],
+    title="استخراج العناوين والهاشتاغات",
+    description="ارفع ملف HTML لاستخراج العناوين والهاشتاغات مع عدد مرات تكرار كل هاشتاغ."
+)
+# تشغيل التطبيق
+interface.launch()