KIMOSSINO commited on
Commit
a568782
·
verified ·
1 Parent(s): 4becc5b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +134 -0
app.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from collections import Counter
3
+ from bs4 import BeautifulSoup
4
+ import requests
5
+ import re
6
+ from urllib.parse import urlparse
7
+ import json
8
+ from youtube_transcript_api import YouTubeTranscriptApi
9
+
10
+ def extract_youtube_id(url):
11
+ """استخراج معرف فيديو يوتيوب من الرابط"""
12
+ if match := re.search(r'(?:v=|\/)([0-9A-Za-z_-]{11}).*', url):
13
+ return match.group(1)
14
+ return None
15
+
16
+ def extract_tiktok_id(url):
17
+ """استخراج معرف فيديو تيكتوك من الرابط"""
18
+ if match := re.search(r'video/(\d+)', url):
19
+ return match.group(1)
20
+ return None
21
+
22
+ def extract_instagram_shortcode(url):
23
+ """استخراج معرف منشور انستغرام من الرابط"""
24
+ if match := re.search(r'/(p|reel)/([A-Za-z0-9_-]+)', url):
25
+ return match.group(2)
26
+ return None
27
+
28
+ def get_hashtags_from_text(text):
29
+ """استخراج الهاشتاغات من النص"""
30
+ return re.findall(r'#\w+', text)
31
+
32
+ def extract_from_url(url):
33
+ """استخراج البيانات من الرابط"""
34
+ domain = urlparse(url).netloc
35
+
36
+ headers = {
37
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
38
+ }
39
+
40
+ try:
41
+ if 'youtube.com' in domain or 'youtu.be' in domain:
42
+ video_id = extract_youtube_id(url)
43
+ if not video_id:
44
+ return "رابط يوتيوب غير صالح", "", ""
45
+
46
+ api_url = f"https://www.youtube.com/watch?v={video_id}"
47
+ response = requests.get(api_url, headers=headers)
48
+
49
+ if 'watch-title' in response.text:
50
+ soup = BeautifulSoup(response.text, 'html.parser')
51
+ title = soup.find('meta', property='og:title')['content']
52
+ description = soup.find('meta', property='og:description')['content']
53
+ hashtags = get_hashtags_from_text(description)
54
+
55
+ return title, description, "\n".join(hashtags)
56
+
57
+ elif 'tiktok.com' in domain:
58
+ video_id = extract_tiktok_id(url)
59
+ if not video_id:
60
+ return "رابط تيكتوك غير صالح", "", ""
61
+
62
+ response = requests.get(url, headers=headers)
63
+ if response.status_code == 200:
64
+ soup = BeautifulSoup(response.text, 'html.parser')
65
+ title = soup.find('meta', property='og:title')['content']
66
+ description = soup.find('meta', property='og:description')['content']
67
+ hashtags = get_hashtags_from_text(description)
68
+
69
+ return title, description, "\n".join(hashtags)
70
+
71
+ elif 'instagram.com' in domain:
72
+ shortcode = extract_instagram_shortcode(url)
73
+ if not shortcode:
74
+ return "رابط انستغرام غير صالح", "", ""
75
+
76
+ response = requests.get(url, headers=headers)
77
+ if response.status_code == 200:
78
+ soup = BeautifulSoup(response.text, 'html.parser')
79
+ title = soup.find('meta', property='og:title')['content']
80
+ description = soup.find('meta', property='og:description')['content']
81
+ hashtags = get_hashtags_from_text(description)
82
+
83
+ return title, description, "\n".join(hashtags)
84
+
85
+ except Exception as e:
86
+ return f"حدث خطأ: {str(e)}", "", ""
87
+
88
+ return "لم يتم العثور على بيانات", "", ""
89
+
90
+ def gradio_interface():
91
+ with gr.Blocks() as demo:
92
+ gr.Markdown("## 📱 محلل روابط وسائل التواصل الاجتماعي")
93
+
94
+ with gr.Row():
95
+ url_input = gr.Textbox(
96
+ label="🔗 أدخل رابط يوتيوب/تيكتوك/انستغرام",
97
+ placeholder="https://..."
98
+ )
99
+
100
+ with gr.Row():
101
+ analyze_btn = gr.Button("تحليل الرابط", variant="primary")
102
+
103
+ with gr.Row():
104
+ title_output = gr.Textbox(
105
+ label="📝 العنوان",
106
+ lines=2,
107
+ interactive=False
108
+ )
109
+
110
+ with gr.Row():
111
+ description_output = gr.Textbox(
112
+ label="📄 الوصف",
113
+ lines=5,
114
+ interactive=False
115
+ )
116
+
117
+ with gr.Row():
118
+ hashtags_output = gr.Textbox(
119
+ label="🏷️ الهاشتاغات",
120
+ lines=5,
121
+ interactive=False
122
+ )
123
+
124
+ analyze_btn.click(
125
+ fn=extract_from_url,
126
+ inputs=[url_input],
127
+ outputs=[title_output, description_output, hashtags_output]
128
+ )
129
+
130
+ return demo
131
+
132
+ if __name__ == "__main__":
133
+ demo = gradio_interface()
134
+ demo.launch()