|
import streamlit as st |
|
from streamlit_option_menu import option_menu |
|
import requests |
|
import pandas as pd |
|
import plotly.express as px |
|
import plotly.graph_objects as go |
|
from datetime import datetime |
|
import httpx |
|
import asyncio |
|
import aiohttp |
|
from bs4 import BeautifulSoup |
|
import whois |
|
import ssl |
|
import socket |
|
import dns.resolver |
|
from urllib.parse import urlparse |
|
import json |
|
import numpy as np |
|
from PIL import Image |
|
import io |
|
import time |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
from datetime import timedelta |
|
import tldextract |
|
from concurrent.futures import ThreadPoolExecutor |
|
import re |
|
from collections import Counter |
|
from wordcloud import WordCloud |
|
import advertools as adv |
|
|
|
|
|
st.set_page_config( |
|
layout="wide", |
|
page_title="محلل المواقع المتقدم | Website Analyzer Pro", |
|
page_icon="🔍", |
|
initial_sidebar_state="expanded" |
|
) |
|
|
|
|
|
st.markdown(""" |
|
<style> |
|
@import url('https://fonts.googleapis.com/css2?family=Tajawal:wght@400;500;700&display=swap'); |
|
|
|
* { |
|
font-family: 'Tajawal', sans-serif; |
|
} |
|
|
|
.main { |
|
background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); |
|
padding: 20px; |
|
} |
|
|
|
.metric-card { |
|
background: white; |
|
border-radius: 15px; |
|
padding: 20px; |
|
box-shadow: 0 4px 15px rgba(0,0,0,0.1); |
|
transition: all 0.3s ease; |
|
margin-bottom: 20px; |
|
} |
|
|
|
.metric-card:hover { |
|
transform: translateY(-5px); |
|
box-shadow: 0 8px 25px rgba(0,0,0,0.15); |
|
} |
|
|
|
.metric-value { |
|
font-size: 2em; |
|
font-weight: bold; |
|
color: #2196F3; |
|
} |
|
|
|
.metric-label { |
|
color: #666; |
|
font-size: 1.1em; |
|
} |
|
|
|
.stButton>button { |
|
background: linear-gradient(45deg, #2196F3, #21CBF3); |
|
color: white; |
|
border-radius: 25px; |
|
padding: 15px 30px; |
|
border: none; |
|
box-shadow: 0 4px 15px rgba(33,150,243,0.3); |
|
transition: all 0.3s ease; |
|
font-size: 1.1em; |
|
font-weight: 500; |
|
width: 100%; |
|
} |
|
|
|
.stButton>button:hover { |
|
transform: translateY(-2px); |
|
box-shadow: 0 6px 20px rgba(33,150,243,0.4); |
|
} |
|
|
|
h1, h2, h3 { |
|
color: #1E3D59; |
|
font-weight: 700; |
|
} |
|
|
|
.stTextInput>div>div>input { |
|
border-radius: 10px; |
|
border: 2px solid #E0E0E0; |
|
padding: 12px; |
|
font-size: 1.1em; |
|
transition: all 0.3s ease; |
|
} |
|
|
|
.stTextInput>div>div>input:focus { |
|
border-color: #2196F3; |
|
box-shadow: 0 0 0 2px rgba(33,150,243,0.2); |
|
} |
|
|
|
.streamlit-expanderHeader { |
|
background-color: white; |
|
border-radius: 10px; |
|
padding: 10px; |
|
box-shadow: 0 2px 8px rgba(0,0,0,0.1); |
|
} |
|
|
|
.stProgress > div > div > div { |
|
background-color: #2196F3; |
|
} |
|
|
|
.tab-content { |
|
padding: 20px; |
|
background: white; |
|
border-radius: 15px; |
|
box-shadow: 0 4px 15px rgba(0,0,0,0.1); |
|
} |
|
|
|
.insight-card { |
|
background: #f8f9fa; |
|
border-right: 4px solid #2196F3; |
|
padding: 15px; |
|
margin: 10px 0; |
|
border-radius: 8px; |
|
} |
|
|
|
.chart-container { |
|
background: white; |
|
padding: 20px; |
|
border-radius: 15px; |
|
box-shadow: 0 4px 15px rgba(0,0,0,0.1); |
|
margin: 20px 0; |
|
} |
|
</style> |
|
""", unsafe_allow_html=True) |
|
|
|
class AdvancedWebsiteAnalyzer: |
|
def __init__(self): |
|
self.headers = { |
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' |
|
} |
|
self.history = self.load_history() |
|
|
|
def load_history(self): |
|
try: |
|
return pd.read_csv('analysis_history.csv') |
|
except: |
|
return pd.DataFrame(columns=['url', 'timestamp', 'performance_score', 'seo_score', 'security_score']) |
|
|
|
def save_history(self, data): |
|
self.history = pd.concat([self.history, pd.DataFrame([data])], ignore_index=True) |
|
self.history.to_csv('analysis_history.csv', index=False) |
|
|
|
async def analyze_performance(self, url): |
|
try: |
|
start_time = time.time() |
|
async with httpx.AsyncClient() as client: |
|
response = await client.get(url) |
|
load_time = time.time() - start_time |
|
page_size = len(response.content) / 1024 |
|
|
|
soup = BeautifulSoup(response.text, 'html.parser') |
|
images = soup.find_all('img') |
|
scripts = soup.find_all('script') |
|
css_files = soup.find_all('link', {'rel': 'stylesheet'}) |
|
|
|
performance_metrics = { |
|
"زمن التحميل": round(load_time, 2), |
|
"حجم الصفحة": round(page_size, 2), |
|
"حالة الاستجابة": response.status_code, |
|
"عدد الصور": len(images), |
|
"عدد ملفات JavaScript": len(scripts), |
|
"عدد ملفات CSS": len(css_files), |
|
"تقييم الأداء": self._calculate_performance_score(load_time, page_size, len(images), len(scripts)), |
|
"توصيات التحسين": self._get_performance_recommendations(load_time, page_size, len(images), len(scripts)) |
|
} |
|
|
|
resources_analysis = await self._analyze_resources(url) |
|
performance_metrics.update(resources_analysis) |
|
|
|
return performance_metrics |
|
except Exception as e: |
|
return {"error": f"خطأ في تحليل الأداء: {str(e)}"} |
|
|
|
async def _analyze_resources(self, url): |
|
try: |
|
async with httpx.AsyncClient() as client: |
|
response = await client.get(url) |
|
soup = BeautifulSoup(response.text, 'html.parser') |
|
|
|
images = soup.find_all('img') |
|
image_sizes = [] |
|
for img in images[:5]: |
|
if img.get('src'): |
|
try: |
|
img_response = await client.get(img['src']) |
|
image_sizes.append(len(img_response.content) / 1024) |
|
except: |
|
continue |
|
|
|
return { |
|
"تحليل الموارد": { |
|
"متوسط حجم الصور": round(np.mean(image_sizes), 2) if image_sizes else 0, |
|
"عدد الموارد الخارجية": len(soup.find_all(['script', 'link', 'img'])), |
|
"توصيات تحسين الموارد": self._get_resource_recommendations(image_sizes) |
|
} |
|
} |
|
except Exception as e: |
|
return {"error": f"خطأ في تحليل الموارد: {str(e)}"} |
|
|
|
def _get_resource_recommendations(self, image_sizes): |
|
recommendations = [] |
|
|
|
if image_sizes: |
|
avg_size = np.mean(image_sizes) |
|
if avg_size > 100: |
|
recommendations.append({ |
|
"المشكلة": "حجم الصور كبير", |
|
"الحل": "ضغط الصور وتحسين جودتها", |
|
"الأولوية": "عالية" |
|
}) |
|
|
|
return recommendations if recommendations else [ |
|
{ |
|
"المشكلة": "لا توجد مشاكل", |
|
"الحل": "الموارد محسنة بشكل جيد", |
|
"الأولوية": "منخفضة" |
|
} |
|
] |
|
|
|
def _calculate_performance_score(self, load_time, page_size, image_count, script_count): |
|
score = 100 |
|
|
|
if load_time > 2: |
|
score -= min(30, (load_time - 2) * 10) |
|
|
|
if page_size > 1000: |
|
score -= min(20, (page_size - 1000) / 100) |
|
|
|
if image_count > 10: |
|
score -= min(15, (image_count - 10) * 1.5) |
|
|
|
if script_count > 5: |
|
score -= min(15, (script_count - 5) * 2) |
|
|
|
return max(0, round(score)) |
|
|
|
def _get_performance_recommendations(self, load_time, page_size, image_count, script_count): |
|
recommendations = [] |
|
|
|
if load_time > 2: |
|
recommendations.append({ |
|
"المشكلة": "بطء زمن التحميل", |
|
"الحل": "تحسين سرعة الخادم وتفعيل التخزين المؤقت", |
|
"الأولوية": "عالية" |
|
}) |
|
|
|
if page_size > 1000: |
|
recommendations.append({ |
|
"المشكلة": "حجم الصفحة كبير", |
|
"الحل": "ضغط الملفات وتحسين الكود", |
|
"الأولوية": "متوسطة" |
|
}) |
|
|
|
if image_count > 10: |
|
recommendations.append({ |
|
"المشكلة": "عدد كبير من الصور", |
|
"الحل": "تحسين حجم الصور واستخدام التحميل الكسول", |
|
"الأولوية": "متوسطة" |
|
}) |
|
|
|
if script_count > 5: |
|
recommendations.append({ |
|
"المشكلة": "عدد كبير من ملفات JavaScript", |
|
"الحل": "دمج وضغط ملفات JavaScript", |
|
"الأولوية": "عالية" |
|
}) |
|
|
|
return recommendations if recommendations else [{"المشكلة": "لا توجد مشاكل", "الحل": "الأداء جيد!", "الأولوية": "منخفضة"}] |
|
|
|
async def analyze_seo(self, url): |
|
try: |
|
async with httpx.AsyncClient() as client: |
|
response = await client.get(url) |
|
soup = BeautifulSoup(response.text, 'html.parser') |
|
|
|
content_analysis = self._analyze_content(soup) |
|
links_analysis = self._analyze_links(soup) |
|
keywords_analysis = self._extract_keywords(soup) |
|
|
|
seo_analysis = { |
|
"تحليل العنوان": self._analyze_title(soup), |
|
"تحليل الوصف": self._analyze_description(soup), |
|
"تحليل الكلمات المفتاحية": keywords_analysis, |
|
"تحليل العناوين": self._analyze_headings(soup), |
|
"تحليل الروابط": links_analysis, |
|
"تحليل المحتوى": content_analysis, |
|
"تقييم SEO": self._calculate_seo_score(soup), |
|
"توصيات تحسين SEO": self._get_seo_recommendations(soup) |
|
} |
|
|
|
return seo_analysis |
|
except Exception as e: |
|
return {"error": f"خطأ في تحليل SEO: {str(e)}"} |
|
|
|
def _analyze_title(self, soup): |
|
title = soup.find('title') |
|
title_text = title.text if title else "" |
|
return { |
|
"العنوان": title_text, |
|
"الطول": len(title_text), |
|
"التقييم": "جيد" if 30 <= len(title_text) <= 60 else "يحتاج تحسين" |
|
} |
|
|
|
def _analyze_description(self, soup): |
|
meta_desc = soup.find('meta', {'name': 'description'}) |
|
desc_text = meta_desc.get('content', '') if meta_desc else "" |
|
return { |
|
"الوصف": desc_text, |
|
"الطول": len(desc_text), |
|
"التقييم": "جيد" if 120 <= len(desc_text) <= 160 else "يحتاج تحسين" |
|
} |
|
|
|
def _analyze_headings(self, soup): |
|
headings = {} |
|
for i in range(1, 7): |
|
h_tags = soup.find_all(f'h{i}') |
|
headings[f'h{i}'] = { |
|
"العدد": len(h_tags), |
|
"النصوص": [h.text.strip() for h in h_tags] |
|
} |
|
return headings |
|
|
|
def _analyze_links(self, soup): |
|
links = soup.find_all('a') |
|
internal_links = [] |
|
external_links = [] |
|
broken_links = [] |
|
|
|
for link in links: |
|
href = link.get('href', '') |
|
if href.startswith('#') or not href: |
|
continue |
|
elif href.startswith('/') or urlparse(href).netloc == urlparse(href).netloc: |
|
internal_links.append(href) |
|
else: |
|
external_links.append(href) |
|
|
|
try: |
|
response = requests.head(href) |
|
if response.status_code >= 400: |
|
broken_links.append(href) |
|
except: |
|
broken_links.append(href) |
|
|
|
return { |
|
"عدد الروابط الداخلية": len(internal_links), |
|
"عدد الروابط الخارجية": len(external_links), |
|
"عدد الروابط المكسورة": len(broken_links), |
|
"الروابط المكسورة": broken_links |
|
} |
|
|
|
def _analyze_content(self, soup): |
|
""" |
|
Analyzes webpage content for SEO factors |
|
""" |
|
try: |
|
|
|
text_content = ' '.join([p.text.strip() for p in soup.find_all(['p', 'div', 'article', 'section'])]) |
|
|
|
|
|
headings = {f'h{i}': len(soup.find_all(f'h{i}')) for i in range(1, 7)} |
|
|
|
|
|
words = text_content.split() |
|
word_count = len(words) |
|
|
|
|
|
readability_score = self._calculate_readability(text_content) |
|
|
|
|
|
keyword_density = self._calculate_keyword_density(text_content) |
|
|
|
|
|
images = soup.find_all('img') |
|
images_with_alt = len([img for img in images if img.get('alt')]) |
|
|
|
|
|
quality_score = self._calculate_content_quality_score( |
|
word_count, |
|
readability_score, |
|
images_with_alt, |
|
len(images), |
|
headings |
|
) |
|
|
|
return { |
|
"إحصائيات المحتوى": { |
|
"عدد الكلمات": word_count, |
|
"مستوى القراءة": readability_score, |
|
"نسبة الصور مع نص بديل": f"{(images_with_alt/len(images)*100 if images else 0):.1f}%", |
|
"توزيع العناوين": headings, |
|
}, |
|
"تحليل الكلمات المفتاحية": { |
|
"كثافة الكلمات الرئيسية": keyword_density, |
|
"الكلمات الأكثر تكراراً": self._get_top_words(text_content, 5) |
|
}, |
|
"تقييم جودة المحتوى": { |
|
"الدرجة": quality_score, |
|
"التقييم": self._get_content_rating(quality_score), |
|
"التوصيات": self._get_content_recommendations( |
|
word_count, |
|
readability_score, |
|
images_with_alt, |
|
len(images), |
|
headings |
|
) |
|
} |
|
} |
|
except Exception as e: |
|
return {"error": f"خطأ في تحليل المحتوى: {str(e)}"} |
|
|
|
def _calculate_content_quality_score(self, word_count, readability, alt_images, total_images, headings): |
|
""" |
|
Calculates a content quality score based on various factors |
|
""" |
|
score = 100 |
|
|
|
|
|
if word_count < 300: |
|
score -= 20 |
|
elif word_count < 600: |
|
score -= 10 |
|
|
|
|
|
if readability < 40: |
|
score -= 15 |
|
elif readability < 60: |
|
score -= 10 |
|
|
|
|
|
if total_images > 0: |
|
alt_ratio = alt_images / total_images |
|
if alt_ratio < 0.5: |
|
score -= 15 |
|
elif alt_ratio < 0.8: |
|
score -= 10 |
|
|
|
|
|
if headings.get('h1', 0) == 0: |
|
score -= 10 |
|
if headings.get('h1', 0) > 1: |
|
score -= 5 |
|
if headings.get('h2', 0) == 0: |
|
score -= 5 |
|
|
|
return max(0, score) |
|
|
|
def _get_content_rating(self, score): |
|
""" |
|
Converts numerical score to qualitative rating |
|
""" |
|
if score >= 90: |
|
return "ممتاز" |
|
elif score >= 80: |
|
return "جيد جداً" |
|
elif score >= 70: |
|
return "جيد" |
|
elif score >= 60: |
|
return "مقبول" |
|
else: |
|
return "يحتاج تحسين" |
|
|
|
def _get_content_recommendations(self, word_count, readability, alt_images, total_images, headings): |
|
""" |
|
Generates content improvement recommendations |
|
""" |
|
recommendations = [] |
|
|
|
if word_count < 300: |
|
recommendations.append({ |
|
"المشكلة": "محتوى قصير جداً", |
|
"الحل": "زيادة المحتوى إلى 300 كلمة على الأقل", |
|
"الأولوية": "عالية" |
|
}) |
|
|
|
if readability < 60: |
|
recommendations.append({ |
|
"المشكلة": "صعوبة قراءة المحتوى", |
|
"الحل": "تبسيط الجمل واستخدام لغة أسهل", |
|
"الأولوية": "متوسطة" |
|
}) |
|
|
|
if total_images > 0 and (alt_images / total_images) < 0.8: |
|
recommendations.append({ |
|
"المشكلة": "نقص في النصوص البديلة للصور", |
|
"الحل": "إضافة نص بديل وصفي لجميع الصور", |
|
"الأولوية": "عالية" |
|
}) |
|
|
|
if headings.get('h1', 0) != 1: |
|
recommendations.append({ |
|
"المشكلة": "عدد غير مناسب من عناوين H1", |
|
"الحل": "استخدام عنوان H1 واحد فقط للصفحة", |
|
"الأولوية": "عالية" |
|
}) |
|
|
|
return recommendations if recommendations else [{ |
|
"المشكلة": "لا توجد مشاكل واضحة", |
|
"الحل": "الاستمرار في تحديث المحتوى بشكل دوري", |
|
"الأولوية": "منخفضة" |
|
}] |
|
|
|
def _get_top_words(self, text, count=5): |
|
""" |
|
Gets the most frequent meaningful words in the content |
|
""" |
|
|
|
stop_words = set(['و', 'في', 'من', 'على', 'the', 'and', 'in', 'of', 'to']) |
|
words = text.lower().split() |
|
word_freq = Counter(word for word in words if word not in stop_words and len(word) > 2) |
|
|
|
return {word: count for word, count in word_freq.most_common(count)} |