|
import streamlit as st |
|
from streamlit_option_menu import option_menu |
|
import requests |
|
import pandas as pd |
|
import plotly.express as px |
|
import plotly.graph_objects as go |
|
from datetime import datetime |
|
import httpx |
|
import asyncio |
|
import aiohttp |
|
from bs4 import BeautifulSoup |
|
import whois |
|
import ssl |
|
import socket |
|
import dns.resolver |
|
from urllib.parse import urlparse |
|
import json |
|
import numpy as np |
|
from PIL import Image |
|
import io |
|
import time |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
from datetime import timedelta |
|
import tldextract |
|
from concurrent.futures import ThreadPoolExecutor |
|
import re |
|
from collections import Counter |
|
from wordcloud import WordCloud |
|
import advertools as adv |
|
|
|
|
|
st.set_page_config( |
|
layout="wide", |
|
page_title="محلل المواقع المتقدم | Website Analyzer Pro", |
|
page_icon="🔍", |
|
initial_sidebar_state="expanded" |
|
) |
|
|
|
|
|
st.markdown(""" |
|
<style> |
|
@import url('https://fonts.googleapis.com/css2?family=Tajawal:wght@400;500;700&display=swap'); |
|
|
|
* { |
|
font-family: 'Tajawal', sans-serif; |
|
} |
|
|
|
.main { |
|
background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); |
|
padding: 20px; |
|
} |
|
|
|
.metric-card { |
|
background: white; |
|
border-radius: 15px; |
|
padding: 20px; |
|
box-shadow: 0 4px 15px rgba(0,0,0,0.1); |
|
transition: all 0.3s ease; |
|
margin-bottom: 20px; |
|
} |
|
|
|
.metric-card:hover { |
|
transform: translateY(-5px); |
|
box-shadow: 0 8px 25px rgba(0,0,0,0.15); |
|
} |
|
|
|
.metric-value { |
|
font-size: 2em; |
|
font-weight: bold; |
|
color: #2196F3; |
|
} |
|
|
|
.metric-label { |
|
color: #666; |
|
font-size: 1.1em; |
|
} |
|
|
|
.stButton>button { |
|
background: linear-gradient(45deg, #2196F3, #21CBF3); |
|
color: white; |
|
border-radius: 25px; |
|
padding: 15px 30px; |
|
border: none; |
|
box-shadow: 0 4px 15px rgba(33,150,243,0.3); |
|
transition: all 0.3s ease; |
|
font-size: 1.1em; |
|
font-weight: 500; |
|
width: 100%; |
|
} |
|
|
|
.stButton>button:hover { |
|
transform: translateY(-2px); |
|
box-shadow: 0 6px 20px rgba(33,150,243,0.4); |
|
} |
|
|
|
h1, h2, h3 { |
|
color: #1E3D59; |
|
font-weight: 700; |
|
} |
|
|
|
.stTextInput>div>div>input { |
|
border-radius: 10px; |
|
border: 2px solid #E0E0E0; |
|
padding: 12px; |
|
font-size: 1.1em; |
|
transition: all 0.3s ease; |
|
} |
|
|
|
.stTextInput>div>div>input:focus { |
|
border-color: #2196F3; |
|
box-shadow: 0 0 0 2px rgba(33,150,243,0.2); |
|
} |
|
|
|
.streamlit-expanderHeader { |
|
background-color: white; |
|
border-radius: 10px; |
|
padding: 10px; |
|
box-shadow: 0 2px 8px rgba(0,0,0,0.1); |
|
} |
|
|
|
.stProgress > div > div > div { |
|
background-color: #2196F3; |
|
} |
|
|
|
.tab-content { |
|
padding: 20px; |
|
background: white; |
|
border-radius: 15px; |
|
box-shadow: 0 4px 15px rgba(0,0,0,0.1); |
|
} |
|
|
|
.insight-card { |
|
background: #f8f9fa; |
|
border-right: 4px solid #2196F3; |
|
padding: 15px; |
|
margin: 10px 0; |
|
border-radius: 8px; |
|
} |
|
|
|
.chart-container { |
|
background: white; |
|
padding: 20px; |
|
border-radius: 15px; |
|
box-shadow: 0 4px 15px rgba(0,0,0,0.1); |
|
margin: 20px 0; |
|
} |
|
</style> |
|
""", unsafe_allow_html=True) |
|
|
|
class AdvancedWebsiteAnalyzer: |
|
def __init__(self): |
|
self.headers = { |
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' |
|
} |
|
self.history = self.load_history() |
|
|
|
def load_history(self): |
|
try: |
|
return pd.read_csv('analysis_history.csv') |
|
except: |
|
return pd.DataFrame(columns=['url', 'timestamp', 'performance_score', 'seo_score', 'security_score']) |
|
|
|
def save_history(self, data): |
|
self.history = self.history.append(data, ignore_index=True) |
|
self.history.to_csv('analysis_history.csv', index=False) |
|
|
|
async def analyze_performance(self, url): |
|
try: |
|
start_time = time.time() |
|
async with httpx.AsyncClient() as client: |
|
response = await client.get(url) |
|
load_time = time.time() - start_time |
|
page_size = len(response.content) / 1024 |
|
|
|
|
|
soup = BeautifulSoup(response.text, 'html.parser') |
|
images = soup.find_all('img') |
|
scripts = soup.find_all('script') |
|
css_files = soup.find_all('link', {'rel': 'stylesheet'}) |
|
|
|
performance_metrics = { |
|
"زمن التحميل": round(load_time, 2), |
|
"حجم الصفحة": round(page_size, 2), |
|
"حالة الاستجابة": response.status_code, |
|
"عدد الصور": len(images), |
|
"عدد ملفات JavaScript": len(scripts), |
|
"عدد ملفات CSS": len(css_files), |
|
"تقييم الأداء": self._calculate_performance_score(load_time, page_size, len(images), len(scripts)), |
|
"توصيات التحسين": self._get_performance_recommendations(load_time, page_size, len(images), len(scripts)) |
|
} |
|
|
|
|
|
resources_analysis = await self._analyze_resources(url) |
|
performance_metrics.update(resources_analysis) |
|
|
|
return performance_metrics |
|
except Exception as e: |
|
return {"error": f"خطأ في تحليل الأداء: {str(e)}"} |
|
|
|
async def _analyze_resources(self, url): |
|
try: |
|
async with httpx.AsyncClient() as client: |
|
response = await client.get(url) |
|
soup = BeautifulSoup(response.text, 'html.parser') |
|
|
|
|
|
images = soup.find_all('img') |
|
image_sizes = [] |
|
for img in images[:5]: |
|
if img.get('src'): |
|
try: |
|
img_response = await client.get(img['src']) |
|
image_sizes.append(len(img_response.content) / 1024) |
|
except: |
|
continue |
|
|
|
return { |
|
"تحليل الموارد": { |
|
"متوسط حجم الصور": round(np.mean(image_sizes), 2) if image_sizes else 0, |
|
"عدد الموارد الخارجية": len(soup.find_all(['script', 'link', 'img'])), |
|
"توصيات تحسين الموارد": self._get_resource_recommendations(image_sizes) |
|
} |
|
} |
|
except Exception as e: |
|
return {"error": f"خطأ في تحليل الموارد: {str(e)}"} |
|
|
|
def _calculate_performance_score(self, load_time, page_size, image_count, script_count): |
|
score = 100 |
|
|
|
|
|
if load_time > 2: |
|
score -= min(30, (load_time - 2) * 10) |
|
|
|
|
|
if page_size > 1000: |
|
score -= min(20, (page_size - 1000) / 100) |
|
|
|
|
|
if image_count > 10: |
|
score -= min(15, (image_count - 10) * 1.5) |
|
|
|
|
|
if script_count > 5: |
|
score -= min(15, (script_count - 5) * 2) |
|
|
|
return max(0, round(score)) |
|
|
|
def _get_performance_recommendations(self, load_time, page_size, image_count, script_count): |
|
recommendations = [] |
|
|
|
if load_time > 2: |
|
recommendations.append({ |
|
"المشكلة": "بطء زمن التحميل", |
|
"الحل": "تحسين سرعة الخادم وتفعيل التخزين المؤقت", |
|
"الأولوية": "عالية" |
|
}) |
|
|
|
if page_size > 1000: |
|
recommendations.append({ |
|
"المشكلة": "حجم الصفحة كبير", |
|
"الحل": "ضغط الملفات وتحسين الكود", |
|
"الأولوية": "متوسطة" |
|
}) |
|
|
|
if image_count > 10: |
|
recommendations.append({ |
|
"المشكلة": "عدد كبير من الصور", |
|
"الحل": "تحسين حجم الصور واستخدام التحميل الكسول", |
|
"الأولوية": "متوسطة" |
|
}) |
|
|
|
if script_count > 5: |
|
recommendations.append({ |
|
"المشكلة": "عدد كبير من ملفات JavaScript", |
|
"الحل": "دمج وضغط ملفات JavaScript", |
|
"الأولوية": "عالية" |
|
}) |
|
|
|
return recommendations if recommendations else [{"المشكلة": "لا توجد مشاكل", "الحل": "الأداء جيد!", "الأولوية": "منخفضة"}] |
|
|
|
async def analyze_seo(self, url): |
|
try: |
|
async with httpx.AsyncClient() as client: |
|
response = await client.get(url) |
|
soup = BeautifulSoup(response.text, 'html.parser') |
|
|
|
|
|
content_analysis = self._analyze_content(soup) |
|
|
|
|
|
links_analysis = self._analyze_links(soup) |
|
|
|
|
|
keywords_analysis = self._extract_keywords(soup) |
|
|
|
seo_analysis = { |
|
"تحليل العنوان": self._analyze_title(soup), |
|
"تحليل الوصف": self._analyze_description(soup), |
|
"تحليل الكلمات المفتاحية": keywords_analysis, |
|
"تحليل العناوين": self._analyze_headings(soup), |
|
"تحليل الروابط": links_analysis, |
|
"تحليل المحتوى": content_analysis, |
|
"تقييم SEO": self._calculate_seo_score(soup), |
|
"توصيات تحسين SEO": self._get_seo_recommendations(soup) |
|
} |
|
|
|
return seo_analysis |
|
except Exception as e: |
|
return {"error": f"خطأ في تحليل SEO: {str(e)}"} |
|
|
|
def _analyze_content(self, soup): |
|
|
|
text_content = ' '.join([p.text for p in soup.find_all('p')]) |
|
|
|
|
|
word_count = len(text_content.split()) |
|
|
|
|
|
readability_score = self._calculate_readability(text_content) |
|
|
|
|
|
keyword_density = self._calculate_keyword_density(text_content) |
|
|
|
return { |
|
"عدد الكلمات": word_count, |
|
"مستوى القراءة": readability_score, |
|
"كثافة الكلمات المفتاحية": keyword_density, |
|
"التقييم": "ممتاز" if word_count > 300 and readability_score > 60 else "يحتاج تحسين" |
|
} |
|
|
|
def _calculate_readability(self, text): |
|
|
|
sentences = len(re.split(r'[.!?]+', text)) |
|
words = len(text.split()) |
|
if sentences == 0: |
|
return 0 |
|
return min(100, round((words / sentences) * 10)) |
|
|
|
def _calculate_keyword_density(self, text): |
|
words = text.lower().split() |
|
word_freq = Counter(words) |
|
total_words = len(words) |
|
|
|
if total_words == 0: |
|
return {} |
|
|
|
return {word: round((count / total_words) * 100, 2) |
|
for word, count in word_freq.most_common(5)} |
|
|
|
def analyze_security(self, url): |
|
try: |
|
domain = urlparse(url).netloc |
|
whois_info = self._get_whois_info(domain) |
|
|
|
security_analysis = { |
|
"تحليل SSL": self._check_ssl(url), |
|
"تحليل DNS": self._check_dns(domain), |
|
"تحليل Headers": self._check_security_headers(url), |
|
"فحص المخاطر": self._check_security_risks(url), |
|
"معلومات Whois": whois_info, |
|
"تقييم الأمان": self._calculate_security_score(url), |
|
"توصيات الأمان": self._get_security_recommendations(url) |
|
} |
|
return security_analysis |
|
except Exception as e: |
|
return {"error": f"خطأ في تحليل الأمان: {str(e)}"} |
|
|
|
def _get_whois_info(self, domain): |
|
try: |
|
w = whois.whois(domain) |
|
return { |
|
"اسم النطاق": domain, |
|
"تاريخ التسجيل": str(w.creation_date), |
|
"تاريخ الانتهاء": str(w.expiration_date), |
|
"المسجل": w.registrar, |
|
"الحالة": w.status |
|
} |
|
except: |
|
return {"error": "لا يمكن الحصول على معلومات Whois"} |
|
|
|
def _check_ssl(self, url): |
|
try: |
|
context = ssl.create_default_context() |
|
with socket.create_connection((urlparse(url).netloc, 443)) as sock: |
|
with context.wrap_socket(sock, server_hostname=urlparse(url).netloc) as ssock: |
|
cert = ssock.getpeercert() |
|
return { |
|
"الحالة": "آمن ✅", |
|
"نوع الشهادة": cert.get('subject', {}).get('commonName', 'Unknown'), |
|
"تاريخ الإصدار": cert.get('notBefore', 'Unknown'), |
|
"تاريخ الانتهاء": cert.get('notAfter', 'Unknown'), |
|
"الخوارزمية": ssock.cipher()[0], |
|
"قوة التشفير": f"{ssock.cipher()[2]} bits" |
|
} |
|
except: |
|
return { |
|
"الحالة": "غير آمن ❌", |
|
"السبب": "لا يوجد شهادة SSL صالحة" |
|
} |
|
|
|
def _check_security_headers(self, url): |
|
try: |
|
response = requests.get(url) |
|
headers = response.headers |
|
security_headers = { |
|
'Strict-Transport-Security': 'HSTS', |
|
'Content-Security-Policy': 'CSP', |
|
'X-Frame-Options': 'X-Frame', |
|
'X-Content-Type-Options': 'X-Content-Type', |
|
'X-XSS-Protection': 'XSS Protection' |
|
} |
|
|
|
results = {} |
|
for header, name in security_headers.items(): |
|
results[name] = { |
|
"موجود": header in headers, |
|
"القيمة": headers.get(header, "غير موجود") |
|
} |
|
return results |
|
except: |
|
return {"error": "فشل فحص headers الأمان"} |
|
|
|
def _check_security_risks(self, url): |
|
risks = [] |
|
|
|
|
|
if not url.startswith('https'): |
|
risks.append({ |
|
"المستوى": "عالي", |
|
"النوع": "بروتوكول غير آمن", |
|
"الوصف": "الموقع يستخدم HTTP بدلاً من HTTPS" |
|
}) |
|
|
|
|
|
ssl_info = self._check_ssl(url) |
|
if ssl_info.get("الحالة") == "غير آمن ❌": |
|
risks.append({ |
|
"المستوى": "عالي", |
|
"النوع": "شهادة SSL", |
|
"الوصف": "شهادة SSL غير صالحة أو منتهية" |
|
}) |
|
|
|
|
|
headers = self._check_security_headers(url) |
|
if isinstance(headers, dict) and not headers.get("HSTS", {}).get("موجود"): |
|
risks.append({ |
|
"المستوى": "متوسط", |
|
"النوع": "HSTS غير مفعل", |
|
"الوصف": "عدم وجود حماية النقل الآمن الصارم" |
|
}) |
|
|
|
return { |
|
"المخاطر المكتشفة": risks, |
|
"عدد المخاطر": len(risks), |
|
"مستوى الخطورة": "عالي" if any(r["المستوى"] == "عالي" for r in risks) else "متوسط" if risks else "منخفض" |
|
} |
|
|
|
def _calculate_security_score(self, url): |
|
score = 100 |
|
|
|
|
|
if not url.startswith('https'): |
|
score -= 30 |
|
|
|
|
|
ssl_info = self._check_ssl(url) |
|
if ssl_info.get("الحالة") == "غير آمن ❌": |
|
score -= 25 |
|
|
|
|
|
headers = self._check_security_headers(url) |
|
if isinstance(headers, dict): |
|
for header_info in headers.values(): |
|
if not header_info.get("موجود"): |
|
score -= 5 |
|
|
|
|
|
risks = self._check_security_risks(url) |
|
score -= (risks.get("عدد المخاطر", 0) * 10) |
|
|
|
return max(0, score) |
|
|
|
def _get_security_recommendations(self, url): |
|
recommendations = [] |
|
|
|
|
|
if not url.startswith('https'): |
|
recommendations.append({ |
|
"المشكلة": "عدم استخدام HTTPS", |
|
"الحل": "قم بتفعيل HTTPS وتثبيت شهادة SSL", |
|
"الأولوية": "عالية" |
|
}) |
|
|
|
|
|
ssl_info = self._check_ssl(url) |
|
if ssl_info.get("الحالة") == "غير آمن ❌": |
|
recommendations.append({ |
|
"المشكلة": "شهادة SSL غير صالحة", |
|
"الحل": "قم بتجديد أو تثبيت شهادة SSL جديدة", |
|
"الأولوية": "عالية" |
|
}) |
|
|
|
|
|
headers = self._check_security_headers(url) |
|
if isinstance(headers, dict): |
|
for name, info in headers.items(): |
|
if not info.get("موجود"): |
|
recommendations.append({ |
|
"المشكلة": f"عدم وجود {name}", |
|
"الحل": f"قم بإضافة header الأمان {name}", |
|
"الأولوية": "متوسطة" |
|
}) |
|
|
|
return recommendations if recommendations else [ |
|
{ |
|
"المشكلة": "لا توجد مشاكل أمنية واضحة", |
|
"الحل": "استمر في مراقبة وتحديث إعدادات الأمان", |
|
"الأولوية": "منخفضة" |
|
} |
|
] |