Update app.py
Browse files
app.py
CHANGED
@@ -1,272 +1,114 @@
|
|
1 |
import streamlit as st
|
2 |
-
from streamlit_lottie import st_lottie
|
3 |
-
from streamlit_option_menu import option_menu
|
4 |
import requests
|
5 |
-
import pandas as pd
|
6 |
-
import plotly.express as px
|
7 |
-
import plotly.graph_objects as go
|
8 |
-
from datetime import datetime
|
9 |
-
import httpx
|
10 |
-
import asyncio
|
11 |
-
import aiohttp
|
12 |
-
from bs4 import BeautifulSoup
|
13 |
import whois
|
14 |
-
import ssl
|
15 |
import socket
|
16 |
-
import
|
17 |
-
from
|
18 |
-
import
|
19 |
-
import numpy as np
|
20 |
-
from selenium import webdriver
|
21 |
-
from selenium.webdriver.chrome.options import Options
|
22 |
-
from webdriver_manager.chrome import ChromeDriverManager
|
23 |
-
from PIL import Image
|
24 |
-
import io
|
25 |
-
import time
|
26 |
-
import tldextract
|
27 |
-
import requests_html
|
28 |
-
from fake_useragent import UserAgent
|
29 |
-
from concurrent.futures import ThreadPoolExecutor
|
30 |
-
import re
|
31 |
-
from urllib.robotparser import RobotFileParser
|
32 |
-
import random
|
33 |
-
from textblob import TextBlob
|
34 |
-
from collections import Counter
|
35 |
-
import networkx as nx
|
36 |
-
|
37 |
-
# تهيئة المتغيرات العامة
|
38 |
-
TIMEOUT = 10
|
39 |
-
MAX_RETRIES = 3
|
40 |
-
COMMON_CRAWL_INDEX = 'https://index.commoncrawl.org/CC-MAIN-2023-50-index'
|
41 |
-
|
42 |
-
class WebsiteAnalyzer:
|
43 |
-
def __init__(self):
|
44 |
-
self.ua = UserAgent()
|
45 |
-
self.session = requests.Session()
|
46 |
-
self.cache = {}
|
47 |
-
|
48 |
-
def _get_headers(self):
|
49 |
-
return {
|
50 |
-
'User-Agent': self.ua.random,
|
51 |
-
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
52 |
-
'Accept-Language': 'en-US,en;q=0.5',
|
53 |
-
'Connection': 'keep-alive',
|
54 |
-
}
|
55 |
-
|
56 |
-
async def _fetch_with_retry(self, url, retries=MAX_RETRIES):
|
57 |
-
for i in range(retries):
|
58 |
-
try:
|
59 |
-
async with httpx.AsyncClient(timeout=TIMEOUT) as client:
|
60 |
-
response = await client.get(url, headers=self._get_headers())
|
61 |
-
response.raise_for_status()
|
62 |
-
return response
|
63 |
-
except Exception as e:
|
64 |
-
if i == retries - 1:
|
65 |
-
raise e
|
66 |
-
await asyncio.sleep(1)
|
67 |
-
|
68 |
-
async def analyze_performance(self, url):
|
69 |
-
try:
|
70 |
-
performance_metrics = {
|
71 |
-
'dns_lookup': [],
|
72 |
-
'tcp_handshake': [],
|
73 |
-
'ttfb': [],
|
74 |
-
'content_download': []
|
75 |
-
}
|
76 |
-
|
77 |
-
# تحليل الأداء على أجهزة مختلفة
|
78 |
-
devices = ['desktop', 'mobile', 'tablet']
|
79 |
-
device_metrics = {}
|
80 |
-
|
81 |
-
for device in devices:
|
82 |
-
chrome_options = Options()
|
83 |
-
chrome_options.add_argument(f"--user-agent={self._get_device_user_agent(device)}")
|
84 |
-
start_time = time.time()
|
85 |
-
|
86 |
-
# قياس الأداء لكل جهاز
|
87 |
-
device_metrics[device] = {
|
88 |
-
'load_time': time.time() - start_time,
|
89 |
-
'render_time': self._measure_render_time(url, chrome_options)
|
90 |
-
}
|
91 |
-
|
92 |
-
# تحليل عام للأداء
|
93 |
-
for _ in range(3):
|
94 |
-
start_time = time.time()
|
95 |
-
domain = urlparse(url).netloc
|
96 |
-
dns_start = time.time()
|
97 |
-
socket.gethostbyname(domain)
|
98 |
-
performance_metrics['dns_lookup'].append(time.time() - dns_start)
|
99 |
-
|
100 |
-
response = await self._fetch_with_retry(url)
|
101 |
-
performance_metrics['ttfb'].append(response.elapsed.total_seconds())
|
102 |
-
performance_metrics['content_download'].append(time.time() - start_time - response.elapsed.total_seconds())
|
103 |
-
|
104 |
-
# تحليل الموارد والتحسينات
|
105 |
-
soup = BeautifulSoup(response.text, 'html.parser')
|
106 |
-
resource_analysis = self._analyze_resources(soup, response.content)
|
107 |
-
optimization_suggestions = self._generate_optimization_suggestions(resource_analysis)
|
108 |
-
|
109 |
-
return {
|
110 |
-
"أداء الموقع": {
|
111 |
-
"تحليل الأجهزة": {
|
112 |
-
device: {
|
113 |
-
"زمن التحميل": f"{metrics['load_time']:.2f} ثانية",
|
114 |
-
"زمن العرض": f"{metrics['render_time']:.2f} ثانية"
|
115 |
-
} for device, metrics in device_metrics.items()
|
116 |
-
},
|
117 |
-
"تحليل الموارد": resource_analysis,
|
118 |
-
"اقتراحات التحسين": optimization_suggestions
|
119 |
-
}
|
120 |
-
}
|
121 |
-
except Exception as e:
|
122 |
-
return {"error": f"حدث خطأ أثناء تحليل الأداء: {str(e)}"}
|
123 |
-
|
124 |
-
def _analyze_resources(self, soup, content):
|
125 |
-
"""تحليل موارد الصفحة وتحديد فرص التحسين"""
|
126 |
-
resources = {
|
127 |
-
'images': self._analyze_images(soup),
|
128 |
-
'scripts': self._analyze_scripts(soup),
|
129 |
-
'styles': self._analyze_styles(soup),
|
130 |
-
'fonts': self._analyze_fonts(soup),
|
131 |
-
'total_size': len(content) / 1024
|
132 |
-
}
|
133 |
-
return resources
|
134 |
-
|
135 |
-
def _analyze_images(self, soup):
|
136 |
-
"""تحليل تفصيلي للصور"""
|
137 |
-
images = soup.find_all('img')
|
138 |
-
return {
|
139 |
-
'count': len(images),
|
140 |
-
'without_alt': len([img for img in images if not img.get('alt')]),
|
141 |
-
'large_images': len([img for img in images if self._is_large_image(img)]),
|
142 |
-
'optimization_needed': self._check_image_optimization(images)
|
143 |
-
}
|
144 |
-
|
145 |
-
def _analyze_competitors(self, url):
|
146 |
-
"""تحليل المنافسين والمقارنة معهم"""
|
147 |
-
try:
|
148 |
-
competitors = self._find_competitors(url)
|
149 |
-
comparison = {}
|
150 |
-
|
151 |
-
for competitor in competitors:
|
152 |
-
comparison[competitor] = {
|
153 |
-
'traffic': self._estimate_traffic(competitor),
|
154 |
-
'keywords': self._analyze_keywords(competitor),
|
155 |
-
'backlinks': self._analyze_backlinks(competitor),
|
156 |
-
'social_presence': self._analyze_social_presence(competitor)
|
157 |
-
}
|
158 |
-
|
159 |
-
return {
|
160 |
-
'المنافسون الرئيسيون': comparison,
|
161 |
-
'تحليل مقارن': self._generate_competitive_analysis(comparison)
|
162 |
-
}
|
163 |
-
except Exception as e:
|
164 |
-
return {"error": f"خطأ في تحليل المنافسين: {str(e)}"}
|
165 |
-
|
166 |
-
def _analyze_content_quality(self, soup):
|
167 |
-
"""تحليل جودة المحتوى"""
|
168 |
-
text_content = soup.get_text()
|
169 |
-
|
170 |
-
# تحليل لغوي
|
171 |
-
blob = TextBlob(text_content)
|
172 |
-
|
173 |
-
# تحليل القراءة
|
174 |
-
readability = self._calculate_readability(text_content)
|
175 |
-
|
176 |
-
# تحليل الكلمات المفتاحية
|
177 |
-
keywords = self._extract_keywords(text_content)
|
178 |
-
|
179 |
-
return {
|
180 |
-
"تحليل المحتوى": {
|
181 |
-
"مستوى القراءة": readability,
|
182 |
-
"تنوع المفردات": self._calculate_lexical_diversity(text_content),
|
183 |
-
"الكلمات المفتاحية الرئيسية": keywords[:10],
|
184 |
-
"العاطفة": {
|
185 |
-
"إيجابية": blob.sentiment.polarity,
|
186 |
-
"موضوعية": blob.sentiment.subjectivity
|
187 |
-
}
|
188 |
-
}
|
189 |
-
}
|
190 |
-
|
191 |
-
def _analyze_backlinks(self, url):
|
192 |
-
"""تحليل الروابط الخلفية"""
|
193 |
-
try:
|
194 |
-
backlinks = self._fetch_backlinks(url)
|
195 |
-
|
196 |
-
# تحليل جودة الروابط
|
197 |
-
quality_metrics = self._analyze_backlink_quality(backlinks)
|
198 |
-
|
199 |
-
# تحليل تنوع المصادر
|
200 |
-
diversity = self._analyze_source_diversity(backlinks)
|
201 |
-
|
202 |
-
return {
|
203 |
-
"تحليل الروابط الخلفية": {
|
204 |
-
"العدد الإجمالي": len(backlinks),
|
205 |
-
"جودة الروابط": quality_metrics,
|
206 |
-
"تنوع المصادر": diversity,
|
207 |
-
"أهم المصادر": self._get_top_referring_domains(backlinks)
|
208 |
-
}
|
209 |
-
}
|
210 |
-
except Exception as e:
|
211 |
-
return {"error": f"خطأ في تحليل الروابط الخلفية: {str(e)}"}
|
212 |
-
|
213 |
-
def _analyze_social_signals(self, url):
|
214 |
-
"""تحليل الإشارات الاجتماعية"""
|
215 |
-
try:
|
216 |
-
social_metrics = {
|
217 |
-
'facebook': self._get_facebook_shares(url),
|
218 |
-
'twitter': self._get_twitter_shares(url),
|
219 |
-
'linkedin': self._get_linkedin_shares(url),
|
220 |
-
'pinterest': self._get_pinterest_shares(url)
|
221 |
-
}
|
222 |
-
|
223 |
-
engagement_analysis = self._analyze_social_engagement(social_metrics)
|
224 |
-
|
225 |
-
return {
|
226 |
-
"التواجد الاجتماعي": {
|
227 |
-
"إحصائيات المشاركة": social_metrics,
|
228 |
-
"تحليل التفاعل": engagement_analysis,
|
229 |
-
"توصيات": self._generate_social_recommendations(engagement_analysis)
|
230 |
-
}
|
231 |
-
}
|
232 |
-
except Exception as e:
|
233 |
-
return {"error": f"خطأ في تحليل الإشارات الاجتماعية: {str(e)}"}
|
234 |
-
|
235 |
-
def _generate_comprehensive_report(self, url):
|
236 |
-
"""توليد تقرير شامل"""
|
237 |
-
try:
|
238 |
-
report = {
|
239 |
-
"تحليل الأداء": self.analyze_performance(url),
|
240 |
-
"تحليل SEO": self.analyze_seo(url),
|
241 |
-
"تحليل الأمان": self.analyze_security(url),
|
242 |
-
"تحليل المنافسين": self._analyze_competitors(url),
|
243 |
-
"تحليل المحتوى": self._analyze_content_quality(BeautifulSoup(requests.get(url).text, 'html.parser')),
|
244 |
-
"تحليل الروابط": self._analyze_backlinks(url),
|
245 |
-
"التواجد الاجتماعي": self._analyze_social_signals(url),
|
246 |
-
"التو��يات": self._generate_recommendations()
|
247 |
-
}
|
248 |
-
|
249 |
-
return report
|
250 |
-
except Exception as e:
|
251 |
-
return {"error": f"خطأ في توليد التقرير الشامل: {str(e)}"}
|
252 |
-
|
253 |
-
def _generate_recommendations(self):
|
254 |
-
"""توليد توصيات مخصصة"""
|
255 |
-
recommendations = {
|
256 |
-
"تحسينات عاجلة": [],
|
257 |
-
"تحسينات متوسطة الأولوية": [],
|
258 |
-
"تحسينات طويلة المدى": []
|
259 |
-
}
|
260 |
-
|
261 |
-
# إضافة التوصيات بناءً على نتائج التحليل
|
262 |
-
return recommendations
|
263 |
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
270 |
|
271 |
if __name__ == "__main__":
|
272 |
-
|
|
|
1 |
import streamlit as st
|
|
|
|
|
2 |
import requests
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
import whois
|
|
|
4 |
import socket
|
5 |
+
import ssl
|
6 |
+
from bs4 import BeautifulSoup
|
7 |
+
from datetime import datetime
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
+
def get_ssl_expiry_date(hostname):
|
10 |
+
try:
|
11 |
+
context = ssl.create_default_context()
|
12 |
+
with socket.create_connection((hostname, 443)) as sock:
|
13 |
+
with context.wrap_socket(sock, server_hostname=hostname) as ssock:
|
14 |
+
ssl_info = ssock.getpeercert()
|
15 |
+
expire_date = datetime.strptime(ssl_info['notAfter'], '%b %d %H:%M:%S %Y %Z')
|
16 |
+
days_left = (expire_date - datetime.utcnow()).days
|
17 |
+
return days_left
|
18 |
+
except Exception as e:
|
19 |
+
return None
|
20 |
+
|
21 |
+
def main():
|
22 |
+
st.title("Website Analysis Tool")
|
23 |
+
|
24 |
+
url = st.text_input("Enter the website URL (e.g., https://www.example.com)")
|
25 |
+
|
26 |
+
if st.button("Analyze") and url:
|
27 |
+
if not url.startswith("http"):
|
28 |
+
url = "http://" + url
|
29 |
+
|
30 |
+
try:
|
31 |
+
response = requests.get(url)
|
32 |
+
status_code = response.status_code
|
33 |
+
|
34 |
+
# Basic SEO Analysis
|
35 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
36 |
+
title = soup.title.string if soup.title else "No title tag found"
|
37 |
+
meta_desc = soup.find('meta', attrs={'name': 'description'})
|
38 |
+
meta_desc_content = meta_desc['content'] if meta_desc else "No meta description found"
|
39 |
+
|
40 |
+
# WHOIS Information
|
41 |
+
domain = url.replace("http://", "").replace("https://", "").split('/')[0]
|
42 |
+
domain_info = whois.whois(domain)
|
43 |
+
|
44 |
+
# SSL Certificate Check
|
45 |
+
ssl_days_left = get_ssl_expiry_date(domain)
|
46 |
+
if ssl_days_left is not None:
|
47 |
+
ssl_status = f"SSL Certificate expires in {ssl_days_left} days"
|
48 |
+
else:
|
49 |
+
ssl_status = "No SSL Certificate found"
|
50 |
+
|
51 |
+
# Security Headers Check
|
52 |
+
security_score = 0
|
53 |
+
headers = response.headers
|
54 |
+
|
55 |
+
if 'X-Frame-Options' in headers:
|
56 |
+
security_score += 10
|
57 |
+
if 'X-Content-Type-Options' in headers:
|
58 |
+
security_score += 10
|
59 |
+
if 'Content-Security-Policy' in headers:
|
60 |
+
security_score += 10
|
61 |
+
if 'Strict-Transport-Security' in headers:
|
62 |
+
security_score += 10
|
63 |
+
if 'Referrer-Policy' in headers:
|
64 |
+
security_score += 10
|
65 |
+
|
66 |
+
# Overall Score Calculation
|
67 |
+
total_score = security_score
|
68 |
+
if title != "No title tag found":
|
69 |
+
total_score += 20
|
70 |
+
if meta_desc_content != "No meta description found":
|
71 |
+
total_score += 20
|
72 |
+
if ssl_days_left is not None:
|
73 |
+
total_score += 20
|
74 |
+
|
75 |
+
st.subheader("SEO Analysis")
|
76 |
+
st.write(f"**Title Tag:** {title}")
|
77 |
+
st.write(f"**Meta Description:** {meta_desc_content}")
|
78 |
+
|
79 |
+
st.subheader("Security Analysis")
|
80 |
+
st.write(f"**SSL Status:** {ssl_status}")
|
81 |
+
st.write("**Security Headers:**")
|
82 |
+
for header in ['X-Frame-Options', 'X-Content-Type-Options', 'Content-Security-Policy',
|
83 |
+
'Strict-Transport-Security', 'Referrer-Policy']:
|
84 |
+
if header in headers:
|
85 |
+
st.write(f"- {header}: {headers[header]}")
|
86 |
+
else:
|
87 |
+
st.write(f"- {header}: Not Found")
|
88 |
+
|
89 |
+
st.subheader("WHOIS Information")
|
90 |
+
st.write(f"**Domain Name:** {domain_info.domain_name}")
|
91 |
+
st.write(f"**Registrar:** {domain_info.registrar}")
|
92 |
+
st.write(f"**Creation Date:** {domain_info.creation_date}")
|
93 |
+
st.write(f"**Expiration Date:** {domain_info.expiration_date}")
|
94 |
+
|
95 |
+
st.subheader("Overall Score")
|
96 |
+
st.write(f"**Total Score:** {total_score} / 100")
|
97 |
+
|
98 |
+
st.subheader("Suggestions for Improvement")
|
99 |
+
if title == "No title tag found":
|
100 |
+
st.write("- Add a title tag to your homepage.")
|
101 |
+
if meta_desc_content == "No meta description found":
|
102 |
+
st.write("- Add a meta description to your homepage.")
|
103 |
+
if ssl_days_left is None:
|
104 |
+
st.write("- Install an SSL certificate to secure your site with HTTPS.")
|
105 |
+
for header in ['X-Frame-Options', 'X-Content-Type-Options', 'Content-Security-Policy',
|
106 |
+
'Strict-Transport-Security', 'Referrer-Policy']:
|
107 |
+
if header not in headers:
|
108 |
+
st.write(f"- Add the {header} header to improve security.")
|
109 |
+
|
110 |
+
except Exception as e:
|
111 |
+
st.error(f"An error occurred: {e}")
|
112 |
|
113 |
if __name__ == "__main__":
|
114 |
+
main()
|