joermd commited on
Commit
0a58343
·
verified ·
1 Parent(s): 3a8824c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -265
app.py CHANGED
@@ -1,272 +1,114 @@
1
  import streamlit as st
2
- from streamlit_lottie import st_lottie
3
- from streamlit_option_menu import option_menu
4
  import requests
5
- import pandas as pd
6
- import plotly.express as px
7
- import plotly.graph_objects as go
8
- from datetime import datetime
9
- import httpx
10
- import asyncio
11
- import aiohttp
12
- from bs4 import BeautifulSoup
13
  import whois
14
- import ssl
15
  import socket
16
- import dns.resolver
17
- from urllib.parse import urlparse, urljoin
18
- import json
19
- import numpy as np
20
- from selenium import webdriver
21
- from selenium.webdriver.chrome.options import Options
22
- from webdriver_manager.chrome import ChromeDriverManager
23
- from PIL import Image
24
- import io
25
- import time
26
- import tldextract
27
- import requests_html
28
- from fake_useragent import UserAgent
29
- from concurrent.futures import ThreadPoolExecutor
30
- import re
31
- from urllib.robotparser import RobotFileParser
32
- import random
33
- from textblob import TextBlob
34
- from collections import Counter
35
- import networkx as nx
36
-
37
- # تهيئة المتغيرات العامة
38
- TIMEOUT = 10
39
- MAX_RETRIES = 3
40
- COMMON_CRAWL_INDEX = 'https://index.commoncrawl.org/CC-MAIN-2023-50-index'
41
-
42
- class WebsiteAnalyzer:
43
- def __init__(self):
44
- self.ua = UserAgent()
45
- self.session = requests.Session()
46
- self.cache = {}
47
-
48
- def _get_headers(self):
49
- return {
50
- 'User-Agent': self.ua.random,
51
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
52
- 'Accept-Language': 'en-US,en;q=0.5',
53
- 'Connection': 'keep-alive',
54
- }
55
-
56
- async def _fetch_with_retry(self, url, retries=MAX_RETRIES):
57
- for i in range(retries):
58
- try:
59
- async with httpx.AsyncClient(timeout=TIMEOUT) as client:
60
- response = await client.get(url, headers=self._get_headers())
61
- response.raise_for_status()
62
- return response
63
- except Exception as e:
64
- if i == retries - 1:
65
- raise e
66
- await asyncio.sleep(1)
67
-
68
- async def analyze_performance(self, url):
69
- try:
70
- performance_metrics = {
71
- 'dns_lookup': [],
72
- 'tcp_handshake': [],
73
- 'ttfb': [],
74
- 'content_download': []
75
- }
76
-
77
- # تحليل الأداء على أجهزة مختلفة
78
- devices = ['desktop', 'mobile', 'tablet']
79
- device_metrics = {}
80
-
81
- for device in devices:
82
- chrome_options = Options()
83
- chrome_options.add_argument(f"--user-agent={self._get_device_user_agent(device)}")
84
- start_time = time.time()
85
-
86
- # قياس الأداء لكل جهاز
87
- device_metrics[device] = {
88
- 'load_time': time.time() - start_time,
89
- 'render_time': self._measure_render_time(url, chrome_options)
90
- }
91
-
92
- # تحليل عام للأداء
93
- for _ in range(3):
94
- start_time = time.time()
95
- domain = urlparse(url).netloc
96
- dns_start = time.time()
97
- socket.gethostbyname(domain)
98
- performance_metrics['dns_lookup'].append(time.time() - dns_start)
99
-
100
- response = await self._fetch_with_retry(url)
101
- performance_metrics['ttfb'].append(response.elapsed.total_seconds())
102
- performance_metrics['content_download'].append(time.time() - start_time - response.elapsed.total_seconds())
103
-
104
- # تحليل الموارد والتحسينات
105
- soup = BeautifulSoup(response.text, 'html.parser')
106
- resource_analysis = self._analyze_resources(soup, response.content)
107
- optimization_suggestions = self._generate_optimization_suggestions(resource_analysis)
108
-
109
- return {
110
- "أداء الموقع": {
111
- "تحليل الأجهزة": {
112
- device: {
113
- "زمن التحميل": f"{metrics['load_time']:.2f} ثانية",
114
- "زمن العرض": f"{metrics['render_time']:.2f} ثانية"
115
- } for device, metrics in device_metrics.items()
116
- },
117
- "تحليل الموارد": resource_analysis,
118
- "اقتراحات التحسين": optimization_suggestions
119
- }
120
- }
121
- except Exception as e:
122
- return {"error": f"حدث خطأ أثناء تحليل الأداء: {str(e)}"}
123
-
124
- def _analyze_resources(self, soup, content):
125
- """تحليل موارد الصفحة وتحديد فرص التحسين"""
126
- resources = {
127
- 'images': self._analyze_images(soup),
128
- 'scripts': self._analyze_scripts(soup),
129
- 'styles': self._analyze_styles(soup),
130
- 'fonts': self._analyze_fonts(soup),
131
- 'total_size': len(content) / 1024
132
- }
133
- return resources
134
-
135
- def _analyze_images(self, soup):
136
- """تحليل تفصيلي للصور"""
137
- images = soup.find_all('img')
138
- return {
139
- 'count': len(images),
140
- 'without_alt': len([img for img in images if not img.get('alt')]),
141
- 'large_images': len([img for img in images if self._is_large_image(img)]),
142
- 'optimization_needed': self._check_image_optimization(images)
143
- }
144
-
145
- def _analyze_competitors(self, url):
146
- """تحليل المنافسين والمقارنة معهم"""
147
- try:
148
- competitors = self._find_competitors(url)
149
- comparison = {}
150
-
151
- for competitor in competitors:
152
- comparison[competitor] = {
153
- 'traffic': self._estimate_traffic(competitor),
154
- 'keywords': self._analyze_keywords(competitor),
155
- 'backlinks': self._analyze_backlinks(competitor),
156
- 'social_presence': self._analyze_social_presence(competitor)
157
- }
158
-
159
- return {
160
- 'المنافسون الرئيسيون': comparison,
161
- 'تحليل مقارن': self._generate_competitive_analysis(comparison)
162
- }
163
- except Exception as e:
164
- return {"error": f"خطأ في تحليل المنافسين: {str(e)}"}
165
-
166
- def _analyze_content_quality(self, soup):
167
- """تحليل جودة المحتوى"""
168
- text_content = soup.get_text()
169
-
170
- # تحليل لغوي
171
- blob = TextBlob(text_content)
172
-
173
- # تحليل القراءة
174
- readability = self._calculate_readability(text_content)
175
-
176
- # تحليل الكلمات المفتاحية
177
- keywords = self._extract_keywords(text_content)
178
-
179
- return {
180
- "تحليل المحتوى": {
181
- "مستوى القراءة": readability,
182
- "تنوع المفردات": self._calculate_lexical_diversity(text_content),
183
- "الكلمات المفتاحية الرئيسية": keywords[:10],
184
- "العاطفة": {
185
- "إيجابية": blob.sentiment.polarity,
186
- "موضوعية": blob.sentiment.subjectivity
187
- }
188
- }
189
- }
190
-
191
- def _analyze_backlinks(self, url):
192
- """تحليل الروابط الخلفية"""
193
- try:
194
- backlinks = self._fetch_backlinks(url)
195
-
196
- # تحليل جودة الروابط
197
- quality_metrics = self._analyze_backlink_quality(backlinks)
198
-
199
- # تحليل تنوع المصادر
200
- diversity = self._analyze_source_diversity(backlinks)
201
-
202
- return {
203
- "تحليل الروابط الخلفية": {
204
- "العدد الإجمالي": len(backlinks),
205
- "جودة الروابط": quality_metrics,
206
- "تنوع المصادر": diversity,
207
- "أهم المصادر": self._get_top_referring_domains(backlinks)
208
- }
209
- }
210
- except Exception as e:
211
- return {"error": f"خطأ في تحليل الروابط الخلفية: {str(e)}"}
212
-
213
- def _analyze_social_signals(self, url):
214
- """تحليل الإشارات الاجتماعية"""
215
- try:
216
- social_metrics = {
217
- 'facebook': self._get_facebook_shares(url),
218
- 'twitter': self._get_twitter_shares(url),
219
- 'linkedin': self._get_linkedin_shares(url),
220
- 'pinterest': self._get_pinterest_shares(url)
221
- }
222
-
223
- engagement_analysis = self._analyze_social_engagement(social_metrics)
224
-
225
- return {
226
- "التواجد الاجتماعي": {
227
- "إحصائيات المشاركة": social_metrics,
228
- "تحليل التفاعل": engagement_analysis,
229
- "توصيات": self._generate_social_recommendations(engagement_analysis)
230
- }
231
- }
232
- except Exception as e:
233
- return {"error": f"خطأ في تحليل الإشارات الاجتماعية: {str(e)}"}
234
-
235
- def _generate_comprehensive_report(self, url):
236
- """توليد تقرير شامل"""
237
- try:
238
- report = {
239
- "تحليل الأداء": self.analyze_performance(url),
240
- "تحليل SEO": self.analyze_seo(url),
241
- "تحليل الأمان": self.analyze_security(url),
242
- "تحليل المنافسين": self._analyze_competitors(url),
243
- "تحليل المحتوى": self._analyze_content_quality(BeautifulSoup(requests.get(url).text, 'html.parser')),
244
- "تحليل الروابط": self._analyze_backlinks(url),
245
- "التواجد الاجتماعي": self._analyze_social_signals(url),
246
- "التو��يات": self._generate_recommendations()
247
- }
248
-
249
- return report
250
- except Exception as e:
251
- return {"error": f"خطأ في توليد التقرير الشامل: {str(e)}"}
252
-
253
- def _generate_recommendations(self):
254
- """توليد توصيات مخصصة"""
255
- recommendations = {
256
- "تحسينات عاجلة": [],
257
- "تحسينات متوسطة الأولوية": [],
258
- "تحسينات طويلة المدى": []
259
- }
260
-
261
- # إضافة التوصيات بناءً على نتائج التحليل
262
- return recommendations
263
 
264
- # مثال للاستخدام
265
- async def main():
266
- analyzer = WebsiteAnalyzer()
267
- url = "https://example.com"
268
- report = await analyzer._generate_comprehensive_report(url)
269
- print(json.dumps(report, ensure_ascii=False, indent=2))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
 
271
  if __name__ == "__main__":
272
- asyncio.run(main())
 
1
  import streamlit as st
 
 
2
  import requests
 
 
 
 
 
 
 
 
3
  import whois
 
4
  import socket
5
+ import ssl
6
+ from bs4 import BeautifulSoup
7
+ from datetime import datetime
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ def get_ssl_expiry_date(hostname):
10
+ try:
11
+ context = ssl.create_default_context()
12
+ with socket.create_connection((hostname, 443)) as sock:
13
+ with context.wrap_socket(sock, server_hostname=hostname) as ssock:
14
+ ssl_info = ssock.getpeercert()
15
+ expire_date = datetime.strptime(ssl_info['notAfter'], '%b %d %H:%M:%S %Y %Z')
16
+ days_left = (expire_date - datetime.utcnow()).days
17
+ return days_left
18
+ except Exception as e:
19
+ return None
20
+
21
+ def main():
22
+ st.title("Website Analysis Tool")
23
+
24
+ url = st.text_input("Enter the website URL (e.g., https://www.example.com)")
25
+
26
+ if st.button("Analyze") and url:
27
+ if not url.startswith("http"):
28
+ url = "http://" + url
29
+
30
+ try:
31
+ response = requests.get(url)
32
+ status_code = response.status_code
33
+
34
+ # Basic SEO Analysis
35
+ soup = BeautifulSoup(response.content, 'html.parser')
36
+ title = soup.title.string if soup.title else "No title tag found"
37
+ meta_desc = soup.find('meta', attrs={'name': 'description'})
38
+ meta_desc_content = meta_desc['content'] if meta_desc else "No meta description found"
39
+
40
+ # WHOIS Information
41
+ domain = url.replace("http://", "").replace("https://", "").split('/')[0]
42
+ domain_info = whois.whois(domain)
43
+
44
+ # SSL Certificate Check
45
+ ssl_days_left = get_ssl_expiry_date(domain)
46
+ if ssl_days_left is not None:
47
+ ssl_status = f"SSL Certificate expires in {ssl_days_left} days"
48
+ else:
49
+ ssl_status = "No SSL Certificate found"
50
+
51
+ # Security Headers Check
52
+ security_score = 0
53
+ headers = response.headers
54
+
55
+ if 'X-Frame-Options' in headers:
56
+ security_score += 10
57
+ if 'X-Content-Type-Options' in headers:
58
+ security_score += 10
59
+ if 'Content-Security-Policy' in headers:
60
+ security_score += 10
61
+ if 'Strict-Transport-Security' in headers:
62
+ security_score += 10
63
+ if 'Referrer-Policy' in headers:
64
+ security_score += 10
65
+
66
+ # Overall Score Calculation
67
+ total_score = security_score
68
+ if title != "No title tag found":
69
+ total_score += 20
70
+ if meta_desc_content != "No meta description found":
71
+ total_score += 20
72
+ if ssl_days_left is not None:
73
+ total_score += 20
74
+
75
+ st.subheader("SEO Analysis")
76
+ st.write(f"**Title Tag:** {title}")
77
+ st.write(f"**Meta Description:** {meta_desc_content}")
78
+
79
+ st.subheader("Security Analysis")
80
+ st.write(f"**SSL Status:** {ssl_status}")
81
+ st.write("**Security Headers:**")
82
+ for header in ['X-Frame-Options', 'X-Content-Type-Options', 'Content-Security-Policy',
83
+ 'Strict-Transport-Security', 'Referrer-Policy']:
84
+ if header in headers:
85
+ st.write(f"- {header}: {headers[header]}")
86
+ else:
87
+ st.write(f"- {header}: Not Found")
88
+
89
+ st.subheader("WHOIS Information")
90
+ st.write(f"**Domain Name:** {domain_info.domain_name}")
91
+ st.write(f"**Registrar:** {domain_info.registrar}")
92
+ st.write(f"**Creation Date:** {domain_info.creation_date}")
93
+ st.write(f"**Expiration Date:** {domain_info.expiration_date}")
94
+
95
+ st.subheader("Overall Score")
96
+ st.write(f"**Total Score:** {total_score} / 100")
97
+
98
+ st.subheader("Suggestions for Improvement")
99
+ if title == "No title tag found":
100
+ st.write("- Add a title tag to your homepage.")
101
+ if meta_desc_content == "No meta description found":
102
+ st.write("- Add a meta description to your homepage.")
103
+ if ssl_days_left is None:
104
+ st.write("- Install an SSL certificate to secure your site with HTTPS.")
105
+ for header in ['X-Frame-Options', 'X-Content-Type-Options', 'Content-Security-Policy',
106
+ 'Strict-Transport-Security', 'Referrer-Policy']:
107
+ if header not in headers:
108
+ st.write(f"- Add the {header} header to improve security.")
109
+
110
+ except Exception as e:
111
+ st.error(f"An error occurred: {e}")
112
 
113
  if __name__ == "__main__":
114
+ main()