Spaces:

joermd
/

fpurl

Sleeping

App Files Files Community

joermd commited on Nov 18, 2024

Commit

7e9a98f

verified ·

1 Parent(s): a54100a

Update app.py

Browse files

Files changed (1) hide show

app.py +329 -145

app.py CHANGED Viewed

@@ -28,9 +28,8 @@ import re
 from collections import Counter
 from wordcloud import WordCloud
 import advertools as adv
-from collections import Counter
-# Page configuration
 st.set_page_config(
     layout="wide",
     page_title="محلل المواقع المتقدم | Website Analyzer Pro",
@@ -38,7 +37,7 @@ st.set_page_config(
     initial_sidebar_state="expanded"
 )
-# Custom CSS
 st.markdown("""
 <style>
     @import url('https://fonts.googleapis.com/css2?family=Tajawal:wght@400;500;700&display=swap');
@@ -156,7 +155,129 @@ class AdvancedWebsiteAnalyzer:
         }
         self.history = self.load_history()
-    # [Previous methods remain the same until analyze_seo]
     async def analyze_seo(self, url):
         try:
@@ -164,8 +285,13 @@ class AdvancedWebsiteAnalyzer:
                 response = await client.get(url)
                 soup = BeautifulSoup(response.text, 'html.parser')
                 content_analysis = self._analyze_content(soup)
                 links_analysis = self._analyze_links(soup)
                 keywords_analysis = self._extract_keywords(soup)
                 seo_analysis = {
@@ -183,155 +309,213 @@ class AdvancedWebsiteAnalyzer:
         except Exception as e:
             return {"error": f"خطأ في تحليل SEO: {str(e)}"}
-    def _extract_keywords(self, soup):
-        # Add implementation for keyword extraction
-        pass
-    def _calculate_seo_score(self, soup):
-        # Add implementation for SEO scoring
-        pass
-    def _get_seo_recommendations(self, soup):
-        # Add implementation for SEO recommendations
-        pass
     def _analyze_content(self, soup):
-        """
-        Analyzes webpage content for SEO factors
-        """
-        try:
-            text_content = ' '.join([p.text.strip() for p in soup.find_all(['p', 'div', 'article', 'section'])])
-            headings = {f'h{i}': len(soup.find_all(f'h{i}')) for i in range(1, 7)}
-            words = text_content.split()
-            word_count = len(words)
-            readability_score = self._calculate_readability(text_content)
-            keyword_density = self._calculate_keyword_density(text_content)
-            images = soup.find_all('img')
-            images_with_alt = len([img for img in images if img.get('alt')])
-            quality_score = self._calculate_content_quality_score(
-                word_count,
-                readability_score,
-                images_with_alt,
-                len(images),
-                headings
-            )
-            return {
-                "إحصائيات المحتوى": {
-                    "عدد الكلمات": word_count,
-                    "مستوى القراءة": readability_score,
-                    "نسبة الصور مع نص بديل": f"{(images_with_alt/len(images)*100 if images else 0):.1f}%",
-                    "توزيع العناوين": headings,
-                },
-                "تحليل الكلمات المفتاحية": {
-                    "كثافة الكلمات الرئيسية": keyword_density,
-                    "الكلمات الأكثر تكراراً": self._get_top_words(text_content, 5)
-                },
-                "تقييم جودة المحتوى": {
-                    "الدرجة": quality_score,
-                    "التقييم": self._get_content_rating(quality_score),
-                    "التوصيات": self._get_content_recommendations(
-                        word_count,
-                        readability_score,
-                        images_with_alt,
-                        len(images),
-                        headings
-                    )
-                }
-            }
-        except Exception as e:
-            return {"error": f"خطأ في تحليل المحتوى: {str(e)}"}
-    def _calculate_readability(self, text):
-        # Add implementation for readability calculation
-        pass
-    def _calculate_keyword_density(self, text):
-        # Add implementation for keyword density calculation
-        pass
-    def _calculate_content_quality_score(self, word_count, readability, alt_images, total_images, headings):
-        score = 100
-        if word_count < 300:
-            score -= 20
-        elif word_count < 600:
-            score -= 10
-        if readability < 40:
-            score -= 15
-        elif readability < 60:
-            score -= 10
-        if total_images > 0:
-            alt_ratio = alt_images / total_images
-            if alt_ratio < 0.5:
-                score -= 15
-            elif alt_ratio < 0.8:
-                score -= 10
-        if headings.get('h1', 0) == 0:
-            score -= 10
-        if headings.get('h1', 0) > 1:
-            score -= 5
-        if headings.get('h2', 0) == 0:
-            score -= 5
-        return max(0, score)
-    def _get_content_rating(self, score):
-        if score >= 90:
-            return "ممتاز"
-        elif score >= 80:
-            return "جيد جداً"
-        elif score >= 70:
-            return "جيد"
-        elif score >= 60:
-            return "مقبول"
-        else:
-            return "يحتاج تحسين"
-    def _get_content_recommendations(self, word_count, readability, alt_images, total_images, headings):
-        recommendations = []
-        if word_count < 300:
-            recommendations.append({
-                "المشكلة": "محتوى قصير جداً",
-                "الحل": "زيادة المحتوى إلى 300 كلمة على الأقل",
-                "الأولوية": "عالية"
-            })
-        if readability < 60:
-            recommendations.append({
-                "المشكلة": "صعوبة قراءة المحتوى",
-                "الحل": "تبسيط الجمل واستخدام لغة أسهل",
-                "الأولوية": "متوسطة"
-            })
-        if total_images > 0 and (alt_images / total_images) < 0.8:
-            recommendations.append({
-                "المشكلة": "نقص في النصوص البديلة للصور",
-                "الحل": "إضافة نص بديل وصفي لجميع الصور",
-                "الأولوية": "عالية"
-            })
-        if headings.get('h1', 0) != 1:
-            recommendations.append({
-                "المشكلة": "عدد غير مناسب من عناوين H1",
-                "الحل": "استخدام عنوان H1 واحد فقط للصفحة",
-                "الأولوية": "عالية"
-            })
-        return recommendations if recommendations else [{
-            "المشكلة": "لا توجد مشاكل واضحة",
-            "الحل": "الاستمرار في تحديث المحتوى بشكل دوري",
-            "الأولوية": "منخفضة"
-        }]
-    def _get_top_words(self, text, count=5):
-        stop_words = set(['و', 'في', 'من', 'على', 'the', 'and', 'in', 'of', 'to'])
-        words = text.lower().split()
-        word_freq = Counter(word for word in words if word not in stop_words and len(word) > 2)
-        return {word: count for word, count in word_freq.most_common(count)}

 from collections import Counter
 from wordcloud import WordCloud
 import advertools as adv
+# تحسين مظهر الصفحة
 st.set_page_config(
     layout="wide",
     page_title="محلل المواقع المتقدم | Website Analyzer Pro",
     initial_sidebar_state="expanded"
 )
+# تحسين التصميم باستخدام CSS المحسن
 st.markdown("""
 <style>
     @import url('https://fonts.googleapis.com/css2?family=Tajawal:wght@400;500;700&display=swap');
         }
         self.history = self.load_history()
+    def load_history(self):
+        try:
+            return pd.read_csv('analysis_history.csv')
+        except:
+            return pd.DataFrame(columns=['url', 'timestamp', 'performance_score', 'seo_score', 'security_score'])
+    def save_history(self, data):
+        self.history = self.history.append(data, ignore_index=True)
+        self.history.to_csv('analysis_history.csv', index=False)
+    async def analyze_performance(self, url):
+        try:
+            start_time = time.time()
+            async with httpx.AsyncClient() as client:
+                response = await client.get(url)
+                load_time = time.time() - start_time
+                page_size = len(response.content) / 1024
+                # تحليل الصور والموارد
+                soup = BeautifulSoup(response.text, 'html.parser')
+                images = soup.find_all('img')
+                scripts = soup.find_all('script')
+                css_files = soup.find_all('link', {'rel': 'stylesheet'})
+                performance_metrics = {
+                    "زمن التحميل": round(load_time, 2),
+                    "حجم الصفحة": round(page_size, 2),
+                    "حالة الاستجابة": response.status_code,
+                    "عدد الصور": len(images),
+                    "عدد ملفات JavaScript": len(scripts),
+                    "عدد ملفات CSS": len(css_files),
+                    "تقييم الأداء": self._calculate_performance_score(load_time, page_size, len(images), len(scripts)),
+                    "توصيات التحسين": self._get_performance_recommendations(load_time, page_size, len(images), len(scripts))
+                }
+                # إضافة تحليل الموارد
+                resources_analysis = await self._analyze_resources(url)
+                performance_metrics.update(resources_analysis)
+                return performance_metrics
+        except Exception as e:
+            return {"error": f"خطأ في تحليل الأداء: {str(e)}"}
+    async def _analyze_resources(self, url):
+        try:
+            async with httpx.AsyncClient() as client:
+                response = await client.get(url)
+                soup = BeautifulSoup(response.text, 'html.parser')
+                # تحليل الصور
+                images = soup.find_all('img')
+                image_sizes = []
+                for img in images[:5]:  # تحليل أول 5 صور فقط لتجنب البطء
+                    if img.get('src'):
+                        try:
+                            img_response = await client.get(img['src'])
+                            image_sizes.append(len(img_response.content) / 1024)
+                        except:
+                            continue
+                return {
+                    "تحليل الموارد": {
+                        "متوسط حجم الصور": round(np.mean(image_sizes), 2) if image_sizes else 0,
+                        "عدد الموارد الخارجية": len(soup.find_all(['script', 'link', 'img'])),
+                        "توصيات تحسين الموارد": self._get_resource_recommendations(image_sizes)
+                    }
+                }
+        except Exception as e:
+            return {"error": f"خطأ في تحليل الموارد: {str(e)}"}
+    def _calculate_performance_score(self, load_time, page_size, image_count, script_count):
+        score = 100
+        # تأثير زمن التحميل
+        if load_time > 2:
+            score -= min(30, (load_time - 2) * 10)
+        # تأثير حجم الصفحة
+        if page_size > 1000:
+            score -= min(20, (page_size - 1000) / 100)
+        # تأثير عدد الصور
+        if image_count > 10:
+            score -= min(15, (image_count - 10) * 1.5)
+        # تأثير عدد ملفات JavaScript
+        if script_count > 5:
+            score -= min(15, (script_count - 5) * 2)
+        return max(0, round(score))
+    def _get_performance_recommendations(self, load_time, page_size, image_count, script_count):
+        recommendations = []
+        if load_time > 2:
+            recommendations.append({
+                "المشكلة": "بطء زمن التحميل",
+                "الحل": "تحسين سرعة الخادم وتفعيل التخزين المؤقت",
+                "الأولوية": "عالية"
+            })
+        if page_size > 1000:
+            recommendations.append({
+                "المشكلة": "حجم الصفحة كبير",
+                "الحل": "ضغط الملفات وتحسين الكود",
+                "الأولوية": "متوسطة"
+            })
+        if image_count > 10:
+            recommendations.append({
+                "المشكلة": "عدد كبير من الصور",
+                "الحل": "تحسين حجم الصور واستخدام التحميل الكسول",
+                "الأولوية": "متوسطة"
+            })
+        if script_count > 5:
+            recommendations.append({
+                "المشكلة": "عدد كبير من ملفات JavaScript",
+                "الحل": "دمج وضغط ملفات JavaScript",
+                "الأولوية": "عالية"
+            })
+        return recommendations if recommendations else [{"المشكلة": "لا توجد مشاكل", "الحل": "الأداء جيد!", "الأولوية": "منخفضة"}]
     async def analyze_seo(self, url):
         try:
                 response = await client.get(url)
                 soup = BeautifulSoup(response.text, 'html.parser')
+                # تحليل المحتوى
                 content_analysis = self._analyze_content(soup)
+                # تحليل الروابط
                 links_analysis = self._analyze_links(soup)
+                # تحليل الكلمات المفتاحية
                 keywords_analysis = self._extract_keywords(soup)
                 seo_analysis = {
         except Exception as e:
             return {"error": f"خطأ في تحليل SEO: {str(e)}"}
     def _analyze_content(self, soup):
+        # استخراج النص
+        text_content = ' '.join([p.text for p in soup.find_all('p')])
+        # تحليل طول المحتوى
+        word_count = len(text_content.split())
+        # تحليل قراءة المحتوى
+        readability_score = self._calculate_readability(text_content)
+        # تحليل كثافة الكلمات المفتاحية
+        keyword_density = self._calculate_keyword_density(text_content)
+        return {
+            "عدد الكلمات": word_count,
+            "مستوى القراءة": readability_score,
+            "كثافة الكلمات المفتاحية": keyword_density,
+            "التقييم": "ممتاز" if word_count > 300 and readability_score > 60 else "يحتاج تحسين"
+        }
+    def _calculate_readability(self, text):
+        # حساب مؤشر بسيط لسهولة القراءة
+        sentences = len(re.split(r'[.!?]+', text))
+        words = len(text.split())
+        if sentences == 0:
+            return 0
+        return min(100, round((words / sentences) * 10))
+    def _calculate_keyword_density(self, text):
+        words = text.lower().split()
+        word_freq = Counter(words)
+        total_words = len(words)
+        if total_words == 0:
+            return {}
+        return {word: round((count / total_words) * 100, 2)
+                for word, count in word_freq.most_common(5)}
+    def analyze_security(self, url):
+    try:
+        domain = urlparse(url).netloc
+        whois_info = self._get_whois_info(domain)
+        security_analysis = {
+            "تحليل SSL": self._check_ssl(url),
+            "تحليل DNS": self._check_dns(domain),
+            "تحليل Headers": self._check_security_headers(url),
+            "فحص المخاطر": self._check_security_risks(url),
+            "معلومات Whois": whois_info,
+            "تقييم الأمان": self._calculate_security_score(url),
+            "توصيات الأمان": self._get_security_recommendations(url)
+        }
+        return security_analysis
+    except Exception as e:
+        return {"error": f"خطأ في تحليل الأمان: {str(e)}"}
+def _get_whois_info(self, domain):
+    try:
+        w = whois.whois(domain)
+        return {
+            "اسم النطاق": domain,
+            "تاريخ التسجيل": str(w.creation_date),
+            "تاريخ الانتهاء": str(w.expiration_date),
+            "المسجل": w.registrar,
+            "الحالة": w.status
+        }
+    except:
+        return {"error": "لا يمكن الحصول على معلومات Whois"}
+def _check_ssl(self, url):
+    try:
+        context = ssl.create_default_context()
+        with socket.create_connection((urlparse(url).netloc, 443)) as sock:
+            with context.wrap_socket(sock, server_hostname=urlparse(url).netloc) as ssock:
+                cert = ssock.getpeercert()
+                return {
+                    "الحالة": "آمن ✅",
+                    "نوع الشهادة": cert.get('subject', {}).get('commonName', 'Unknown'),
+                    "تاريخ الإصدار": cert.get('notBefore', 'Unknown'),
+                    "تاريخ الانتهاء": cert.get('notAfter', 'Unknown'),
+                    "الخوارزمية": ssock.cipher()[0],
+                    "قوة التشفير": f"{ssock.cipher()[2]} bits"
+                }
+    except:
+        return {
+            "الحالة": "غير آمن ❌",
+            "السبب": "لا يوجد شهادة SSL صالحة"
+        }
+def _check_security_headers(self, url):
+    try:
+        response = requests.get(url)
+        headers = response.headers
+        security_headers = {
+            'Strict-Transport-Security': 'HSTS',
+            'Content-Security-Policy': 'CSP',
+            'X-Frame-Options': 'X-Frame',
+            'X-Content-Type-Options': 'X-Content-Type',
+            'X-XSS-Protection': 'XSS Protection'
+        }
+        results = {}
+        for header, name in security_headers.items():
+            results[name] = {
+                "موجود": header in headers,
+                "القيمة": headers.get(header, "غير موجود")
+            }
+        return results
+    except:
+        return {"error": "فشل فحص headers الأمان"}
+def _check_security_risks(self, url):
+    risks = []
+    # فحص بروتوكول HTTP
+    if not url.startswith('https'):
+        risks.append({
+            "المستوى": "عالي",
+            "النوع": "بروتوكول غير آمن",
+            "الوصف": "الموقع يستخدم HTTP بدلاً من HTTPS"
+        })
+    # فحص تحديث شهادة SSL
+    ssl_info = self._check_ssl(url)
+    if ssl_info.get("الحالة") == "غير آمن ❌":
+        risks.append({
+            "المستوى": "عالي",
+            "النوع": "شهادة SSL",
+            "الوصف": "شهادة SSL غير صالحة أو منتهية"
+        })
+    # فحص headers الأمان
+    headers = self._check_security_headers(url)
+    if isinstance(headers, dict) and not headers.get("HSTS", {}).get("موجود"):
+        risks.append({
+            "المستوى": "متوسط",
+            "النوع": "HSTS غير مفعل",
+            "الوصف": "عدم وجود حماية النقل الآمن الصارم"
+        })
+    return {
+        "المخاطر المكتشفة": risks,
+        "عدد المخاطر": len(risks),
+        "مستوى الخطورة": "عالي" if any(r["المستوى"] == "عالي" for r in risks) else "متوسط" if risks else "منخفض"
+    }
+def _calculate_security_score(self, url):
+    score = 100
+    # فحص HTTPS
+    if not url.startswith('https'):
+        score -= 30
+    # فحص SSL
+    ssl_info = self._check_ssl(url)
+    if ssl_info.get("الحالة") == "غير آمن ❌":
+        score -= 25
+    # فحص Headers
+    headers = self._check_security_headers(url)
+    if isinstance(headers, dict):
+        for header_info in headers.values():
+            if not header_info.get("موجود"):
+                score -= 5
+    # فحص مخاطر الأمان
+    risks = self._check_security_risks(url)
+    score -= (risks.get("عدد المخاطر", 0) * 10)
+    return max(0, score)
+def _get_security_recommendations(self, url):
+    recommendations = []
+    # فحص HTTPS
+    if not url.startswith('https'):
+        recommendations.append({
+            "المشكلة": "عدم استخدام HTTPS",
+            "الحل": "قم بتفعيل HTTPS وتثبيت شهادة SSL",
+            "الأولوية": "عالية"
+        })
+    # فحص SSL
+    ssl_info = self._check_ssl(url)
+    if ssl_info.get("الحالة") == "غير آمن ❌":
+        recommendations.append({
+            "المشكلة": "شهادة SSL غير صالحة",
+            "الحل": "قم بتجديد أو تثبيت شهادة SSL جديدة",
+            "الأولوية": "عالية"
+        })
+    # فحص Headers
+    headers = self._check_security_headers(url)
+    if isinstance(headers, dict):
+        for name, info in headers.items():
+            if not info.get("موجود"):
+                recommendations.append({
+                    "المشكلة": f"عدم وجود {name}",
+                    "الحل": f"قم بإضافة header الأمان {name}",
+                    "الأولوية": "متوسطة"
+                })
+    return recommendations if recommendations else [
+        {
+            "المشكلة": "لا توجد مشاكل أمنية واضحة",
+            "الحل": "استمر في مراقبة وتحديث إعدادات الأمان",
+            "الأولوية": "منخفضة"
+        }
+    ]