|
import streamlit as st |
|
from streamlit_lottie import st_lottie |
|
from streamlit_option_menu import option_menu |
|
import requests |
|
import pandas as pd |
|
import plotly.express as px |
|
import plotly.graph_objects as go |
|
from datetime import datetime |
|
import httpx |
|
import asyncio |
|
import aiohttp |
|
from bs4 import BeautifulSoup |
|
import whois |
|
import ssl |
|
import socket |
|
import dns.resolver |
|
from urllib.parse import urlparse, urljoin |
|
import json |
|
import numpy as np |
|
from selenium import webdriver |
|
from selenium.webdriver.chrome.options import Options |
|
from webdriver_manager.chrome import ChromeDriverManager |
|
from PIL import Image |
|
import io |
|
import time |
|
import tldextract |
|
import requests_html |
|
from fake_useragent import UserAgent |
|
from concurrent.futures import ThreadPoolExecutor |
|
import re |
|
from urllib.robotparser import RobotFileParser |
|
import random |
|
from textblob import TextBlob |
|
from collections import Counter |
|
import networkx as nx |
|
|
|
|
|
TIMEOUT = 10 |
|
MAX_RETRIES = 3 |
|
COMMON_CRAWL_INDEX = 'https://index.commoncrawl.org/CC-MAIN-2023-50-index' |
|
|
|
class WebsiteAnalyzer: |
|
def __init__(self): |
|
self.ua = UserAgent() |
|
self.session = requests.Session() |
|
self.cache = {} |
|
|
|
def _get_headers(self): |
|
return { |
|
'User-Agent': self.ua.random, |
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', |
|
'Accept-Language': 'en-US,en;q=0.5', |
|
'Connection': 'keep-alive', |
|
} |
|
|
|
async def _fetch_with_retry(self, url, retries=MAX_RETRIES): |
|
for i in range(retries): |
|
try: |
|
async with httpx.AsyncClient(timeout=TIMEOUT) as client: |
|
response = await client.get(url, headers=self._get_headers()) |
|
response.raise_for_status() |
|
return response |
|
except Exception as e: |
|
if i == retries - 1: |
|
raise e |
|
await asyncio.sleep(1) |
|
|
|
async def analyze_performance(self, url): |
|
try: |
|
performance_metrics = { |
|
'dns_lookup': [], |
|
'tcp_handshake': [], |
|
'ttfb': [], |
|
'content_download': [] |
|
} |
|
|
|
|
|
devices = ['desktop', 'mobile', 'tablet'] |
|
device_metrics = {} |
|
|
|
for device in devices: |
|
chrome_options = Options() |
|
chrome_options.add_argument(f"--user-agent={self._get_device_user_agent(device)}") |
|
start_time = time.time() |
|
|
|
|
|
device_metrics[device] = { |
|
'load_time': time.time() - start_time, |
|
'render_time': self._measure_render_time(url, chrome_options) |
|
} |
|
|
|
|
|
for _ in range(3): |
|
start_time = time.time() |
|
domain = urlparse(url).netloc |
|
dns_start = time.time() |
|
socket.gethostbyname(domain) |
|
performance_metrics['dns_lookup'].append(time.time() - dns_start) |
|
|
|
response = await self._fetch_with_retry(url) |
|
performance_metrics['ttfb'].append(response.elapsed.total_seconds()) |
|
performance_metrics['content_download'].append(time.time() - start_time - response.elapsed.total_seconds()) |
|
|
|
|
|
soup = BeautifulSoup(response.text, 'html.parser') |
|
resource_analysis = self._analyze_resources(soup, response.content) |
|
optimization_suggestions = self._generate_optimization_suggestions(resource_analysis) |
|
|
|
return { |
|
"أداء الموقع": { |
|
"تحليل الأجهزة": { |
|
device: { |
|
"زمن التحميل": f"{metrics['load_time']:.2f} ثانية", |
|
"زمن العرض": f"{metrics['render_time']:.2f} ثانية" |
|
} for device, metrics in device_metrics.items() |
|
}, |
|
"تحليل الموارد": resource_analysis, |
|
"اقتراحات التحسين": optimization_suggestions |
|
} |
|
} |
|
except Exception as e: |
|
return {"error": f"حدث خطأ أثناء تحليل الأداء: {str(e)}"} |
|
|
|
def _analyze_resources(self, soup, content): |
|
"""تحليل موارد الصفحة وتحديد فرص التحسين""" |
|
resources = { |
|
'images': self._analyze_images(soup), |
|
'scripts': self._analyze_scripts(soup), |
|
'styles': self._analyze_styles(soup), |
|
'fonts': self._analyze_fonts(soup), |
|
'total_size': len(content) / 1024 |
|
} |
|
return resources |
|
|
|
def _analyze_images(self, soup): |
|
"""تحليل تفصيلي للصور""" |
|
images = soup.find_all('img') |
|
return { |
|
'count': len(images), |
|
'without_alt': len([img for img in images if not img.get('alt')]), |
|
'large_images': len([img for img in images if self._is_large_image(img)]), |
|
'optimization_needed': self._check_image_optimization(images) |
|
} |
|
|
|
def _analyze_competitors(self, url): |
|
"""تحليل المنافسين والمقارنة معهم""" |
|
try: |
|
competitors = self._find_competitors(url) |
|
comparison = {} |
|
|
|
for competitor in competitors: |
|
comparison[competitor] = { |
|
'traffic': self._estimate_traffic(competitor), |
|
'keywords': self._analyze_keywords(competitor), |
|
'backlinks': self._analyze_backlinks(competitor), |
|
'social_presence': self._analyze_social_presence(competitor) |
|
} |
|
|
|
return { |
|
'المنافسون الرئيسيون': comparison, |
|
'تحليل مقارن': self._generate_competitive_analysis(comparison) |
|
} |
|
except Exception as e: |
|
return {"error": f"خطأ في تحليل المنافسين: {str(e)}"} |
|
|
|
def _analyze_content_quality(self, soup): |
|
"""تحليل جودة المحتوى""" |
|
text_content = soup.get_text() |
|
|
|
|
|
blob = TextBlob(text_content) |
|
|
|
|
|
readability = self._calculate_readability(text_content) |
|
|
|
|
|
keywords = self._extract_keywords(text_content) |
|
|
|
return { |
|
"تحليل المحتوى": { |
|
"مستوى القراءة": readability, |
|
"تنوع المفردات": self._calculate_lexical_diversity(text_content), |
|
"الكلمات المفتاحية الرئيسية": keywords[:10], |
|
"العاطفة": { |
|
"إيجابية": blob.sentiment.polarity, |
|
"موضوعية": blob.sentiment.subjectivity |
|
} |
|
} |
|
} |
|
|
|
def _analyze_backlinks(self, url): |
|
"""تحليل الروابط الخلفية""" |
|
try: |
|
backlinks = self._fetch_backlinks(url) |
|
|
|
|
|
quality_metrics = self._analyze_backlink_quality(backlinks) |
|
|
|
|
|
diversity = self._analyze_source_diversity(backlinks) |
|
|
|
return { |
|
"تحليل الروابط الخلفية": { |
|
"العدد الإجمالي": len(backlinks), |
|
"جودة الروابط": quality_metrics, |
|
"تنوع المصادر": diversity, |
|
"أهم المصادر": self._get_top_referring_domains(backlinks) |
|
} |
|
} |
|
except Exception as e: |
|
return {"error": f"خطأ في تحليل الروابط الخلفية: {str(e)}"} |
|
|
|
def _analyze_social_signals(self, url): |
|
"""تحليل الإشارات الاجتماعية""" |
|
try: |
|
social_metrics = { |
|
'facebook': self._get_facebook_shares(url), |
|
'twitter': self._get_twitter_shares(url), |
|
'linkedin': self._get_linkedin_shares(url), |
|
'pinterest': self._get_pinterest_shares(url) |
|
} |
|
|
|
engagement_analysis = self._analyze_social_engagement(social_metrics) |
|
|
|
return { |
|
"التواجد الاجتماعي": { |
|
"إحصائيات المشاركة": social_metrics, |
|
"تحليل التفاعل": engagement_analysis, |
|
"توصيات": self._generate_social_recommendations(engagement_analysis) |
|
} |
|
} |
|
except Exception as e: |
|
return {"error": f"خطأ في تحليل الإشارات الاجتماعية: {str(e)}"} |
|
|
|
def _generate_comprehensive_report(self, url): |
|
"""توليد تقرير شامل""" |
|
try: |
|
report = { |
|
"تحليل الأداء": self.analyze_performance(url), |
|
"تحليل SEO": self.analyze_seo(url), |
|
"تحليل الأمان": self.analyze_security(url), |
|
"تحليل المنافسين": self._analyze_competitors(url), |
|
"تحليل المحتوى": self._analyze_content_quality(BeautifulSoup(requests.get(url).text, 'html.parser')), |
|
"تحليل الروابط": self._analyze_backlinks(url), |
|
"التواجد الاجتماعي": self._analyze_social_signals(url), |
|
"التوصيات": self._generate_recommendations() |
|
} |
|
|
|
return report |
|
except Exception as e: |
|
return {"error": f"خطأ في توليد التقرير الشامل: {str(e)}"} |
|
|
|
def _generate_recommendations(self): |
|
"""توليد توصيات مخصصة""" |
|
recommendations = { |
|
"تحسينات عاجلة": [], |
|
"تحسينات متوسطة الأولوية": [], |
|
"تحسينات طويلة المدى": [] |
|
} |
|
|
|
|
|
return recommendations |
|
|
|
|
|
async def main(): |
|
analyzer = WebsiteAnalyzer() |
|
url = "https://example.com" |
|
report = await analyzer._generate_comprehensive_report(url) |
|
print(json.dumps(report, ensure_ascii=False, indent=2)) |
|
|
|
if __name__ == "__main__": |
|
asyncio.run(main()) |