Spaces:

KIMOSSINO
/

hashtagtik

Sleeping

App Files Files Community

KIMOSSINO commited on Dec 10, 2024

Commit

4dbac94

verified ·

1 Parent(s): a568782

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -52

app.py CHANGED Viewed

@@ -1,29 +1,18 @@
 import gradio as gr
-from collections import Counter
 from bs4 import BeautifulSoup
 import requests
 import re
 from urllib.parse import urlparse
-import json
-from youtube_transcript_api import YouTubeTranscriptApi
 def extract_youtube_id(url):
     """استخراج معرف فيديو يوتيوب من الرابط"""
-    if match := re.search(r'(?:v=|\/)([0-9A-Za-z_-]{11}).*', url):
-        return match.group(1)
-    return None
 def extract_tiktok_id(url):
     """استخراج معرف فيديو تيكتوك من الرابط"""
-    if match := re.search(r'video/(\d+)', url):
-        return match.group(1)
-    return None
-def extract_instagram_shortcode(url):
-    """استخراج معرف منشور انستغرام من الرابط"""
-    if match := re.search(r'/(p|reel)/([A-Za-z0-9_-]+)', url):
-        return match.group(2)
-    return None
 def get_hashtags_from_text(text):
     """استخراج الهاشتاغات من النص"""
@@ -32,55 +21,54 @@ def get_hashtags_from_text(text):
 def extract_from_url(url):
     """استخراج البيانات من الرابط"""
     domain = urlparse(url).netloc
     headers = {
         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
     }
     try:
         if 'youtube.com' in domain or 'youtu.be' in domain:
             video_id = extract_youtube_id(url)
             if not video_id:
                 return "رابط يوتيوب غير صالح", "", ""
-            api_url = f"https://www.youtube.com/watch?v={video_id}"
-            response = requests.get(api_url, headers=headers)
-            if 'watch-title' in response.text:
-                soup = BeautifulSoup(response.text, 'html.parser')
-                title = soup.find('meta', property='og:title')['content']
-                description = soup.find('meta', property='og:description')['content']
-                hashtags = get_hashtags_from_text(description)
-                return title, description, "\n".join(hashtags)
         elif 'tiktok.com' in domain:
             video_id = extract_tiktok_id(url)
             if not video_id:
                 return "رابط تيكتوك غير صالح", "", ""
-            response = requests.get(url, headers=headers)
-            if response.status_code == 200:
-                soup = BeautifulSoup(response.text, 'html.parser')
-                title = soup.find('meta', property='og:title')['content']
-                description = soup.find('meta', property='og:description')['content']
-                hashtags = get_hashtags_from_text(description)
-                return title, description, "\n".join(hashtags)
-        elif 'instagram.com' in domain:
-            shortcode = extract_instagram_shortcode(url)
-            if not shortcode:
-                return "رابط انستغرام غير صالح", "", ""
             response = requests.get(url, headers=headers)
-            if response.status_code == 200:
-                soup = BeautifulSoup(response.text, 'html.parser')
-                title = soup.find('meta', property='og:title')['content']
-                description = soup.find('meta', property='og:description')['content']
-                hashtags = get_hashtags_from_text(description)
-                return title, description, "\n".join(hashtags)
     except Exception as e:
         return f"حدث خطأ: {str(e)}", "", ""
@@ -93,8 +81,8 @@ def gradio_interface():
         with gr.Row():
             url_input = gr.Textbox(
-                label="🔗 أدخل رابط يوتيوب/تيكتوك/انستغرام",
-                placeholder="https://..."
             )
         with gr.Row():
@@ -131,4 +119,4 @@ def gradio_interface():
 if __name__ == "__main__":
     demo = gradio_interface()
-    demo.launch()

 import gradio as gr
 from bs4 import BeautifulSoup
 import requests
 import re
 from urllib.parse import urlparse
 def extract_youtube_id(url):
     """استخراج معرف فيديو يوتيوب من الرابط"""
+    match = re.search(r'(?:v=|\/)([0-9A-Za-z_-]{11}).*', url)
+    return match.group(1) if match else None
 def extract_tiktok_id(url):
     """استخراج معرف فيديو تيكتوك من الرابط"""
+    match = re.search(r'video/(\d+)', url)
+    return match.group(1) if match else None
 def get_hashtags_from_text(text):
     """استخراج الهاشتاغات من النص"""
 def extract_from_url(url):
     """استخراج البيانات من الرابط"""
     domain = urlparse(url).netloc
     headers = {
         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
     }
     try:
+        # استخراج البيانات من YouTube
         if 'youtube.com' in domain or 'youtu.be' in domain:
             video_id = extract_youtube_id(url)
             if not video_id:
                 return "رابط يوتيوب غير صالح", "", ""
+            response = requests.get(url, headers=headers)
+            soup = BeautifulSoup(response.text, 'html.parser')
+            # استخراج العنوان
+            title_element = soup.find('h1', {'id': 'title'})
+            title = title_element.text.strip() if title_element else "العنوان غير متوفر"
+            # استخراج الوصف
+            description_element = soup.find('div', {'id': 'description'})
+            description = description_element.text.strip() if description_element else "الوصف غير متوفر"
+            # استخراج الهاشتاغات
+            hashtags = get_hashtags_from_text(description)
+            return title, description, "\n".join(hashtags)
+        # استخراج البيانات من TikTok
         elif 'tiktok.com' in domain:
             video_id = extract_tiktok_id(url)
             if not video_id:
                 return "رابط تيكتوك غير صالح", "", ""
             response = requests.get(url, headers=headers)
+            soup = BeautifulSoup(response.text, 'html.parser')
+            # استخراج العنوان
+            title_element = soup.find('h1')
+            title = title_element.text.strip() if title_element else "العنوان غير متوفر"
+            # استخراج الوصف
+            description = title  # الوصف موجود داخل نفس العنصر <h1>
+            # استخراج الهاشتاغات
+            hashtags_elements = soup.find_all('a', {'class': re.compile(r'.*hashtag.*')})
+            hashtags = [tag.text.strip() for tag in hashtags_elements]
+            return title, description, "\n".join(hashtags)
     except Exception as e:
         return f"حدث خطأ: {str(e)}", "", ""
         with gr.Row():
             url_input = gr.Textbox(
+                label="🔗 أدخل رابط يوتيوب/تيكتوك",
+                placeholder="https://.../video/..."
             )
         with gr.Row():
 if __name__ == "__main__":
     demo = gradio_interface()
+    demo.launch()