Spaces:

joermd
/

fpurl

Sleeping

App Files Files Community

joermd commited on Nov 18, 2024

Commit

0a285a8

verified ·

1 Parent(s): 1148762

Update app.py

Browse files

Files changed (1) hide show

app.py +153 -152

app.py CHANGED Viewed

@@ -374,164 +374,165 @@ class AdvancedWebsiteAnalyzer:
             "الروابط المكسورة": broken_links
         }
-   def _analyze_content(self, soup):
-    """
-    Analyzes webpage content for SEO factors
-    """
-    try:
-        # Extract all text content
-        text_content = ' '.join([p.text.strip() for p in soup.find_all(['p', 'div', 'article', 'section'])])
-        # Analyze headings hierarchy
-        headings = {f'h{i}': len(soup.find_all(f'h{i}')) for i in range(1, 7)}
-        # Calculate word count
-        words = text_content.split()
-        word_count = len(words)
-        # Calculate readability score
-        readability_score = self._calculate_readability(text_content)
-        # Analyze keyword density
-        keyword_density = self._calculate_keyword_density(text_content)
-        # Check for images with alt text
-        images = soup.find_all('img')
-        images_with_alt = len([img for img in images if img.get('alt')])
-        # Calculate content quality score
-        quality_score = self._calculate_content_quality_score(
-            word_count,
-            readability_score,
-            images_with_alt,
-            len(images),
-            headings
-        )
-        return {
-            "إحصائيات المحتوى": {
-                "عدد الكلمات": word_count,
-                "مستوى القراءة": readability_score,
-                "نسبة الصور مع نص بديل": f"{(images_with_alt/len(images)*100 if images else 0):.1f}%",
-                "توزيع العناوين": headings,
-            },
-            "تحليل الكلمات المفتاحية": {
-                "كثافة الكلمات الرئيسية": keyword_density,
-                "الكلمات الأكثر تكراراً": self._get_top_words(text_content, 5)
-            },
-            "تقييم جودة المحتوى": {
-                "الدرجة": quality_score,
-                "التقييم": self._get_content_rating(quality_score),
-                "التوصيات": self._get_content_recommendations(
-                    word_count,
-                    readability_score,
-                    images_with_alt,
-                    len(images),
-                    headings
-                )
-            }
-        }
-    except Exception as e:
-        return {"error": f"خطأ في تحليل المحتوى: {str(e)}"}
-def _calculate_content_quality_score(self, word_count, readability, alt_images, total_images, headings):
-    """
-    Calculates a content quality score based on various factors
-    """
-    score = 100
-    # Word count scoring
-    if word_count < 300:
-        score -= 20
-    elif word_count < 600:
-        score -= 10
-    # Readability scoring
-    if readability < 40:
-        score -= 15
-    elif readability < 60:
-        score -= 10
-    # Image alt text scoring
-    if total_images > 0:
-        alt_ratio = alt_images / total_images
-        if alt_ratio < 0.5:
-            score -= 15
-        elif alt_ratio < 0.8:
-            score -= 10
-    # Heading hierarchy scoring
-    if headings.get('h1', 0) == 0:
-        score -= 10
-    if headings.get('h1', 0) > 1:
-        score -= 5
-    if headings.get('h2', 0) == 0:
-        score -= 5
-    return max(0, score)
-def _get_content_rating(self, score):
-    """
-    Converts numerical score to qualitative rating
-    """
-    if score >= 90:
-        return "ممتاز"
-    elif score >= 80:
-        return "جيد جداً"
-    elif score >= 70:
-        return "جيد"
-    elif score >= 60:
-        return "مقبول"
-    else:
-        return "يحتاج تحسين"
-def _get_content_recommendations(self, word_count, readability, alt_images, total_images, headings):
-    """
-    Generates content improvement recommendations
-    """
-    recommendations = []
-    if word_count < 300:
-        recommendations.append({
-            "المشكلة": "محتوى قصير جداً",
-            "الحل": "زيادة المحتوى إلى 300 كلمة على الأقل",
-            "الأولوية": "عالية"
-        })
-    if readability < 60:
-        recommendations.append({
-            "المشكلة": "صعوبة قراءة المحتوى",
-            "الحل": "تبسيط الجمل واستخدام لغة أسهل",
-            "الأولوية": "متوسطة"
-        })
-    if total_images > 0 and (alt_images / total_images) < 0.8:
-        recommendations.append({
-            "المشكلة": "نقص في النصوص البديلة للصور",
-            "الحل": "إضافة نص بديل وصفي لجميع الصور",
-            "الأولوية": "عالية"
-        })
-    if headings.get('h1', 0) != 1:
-        recommendations.append({
-            "المشكلة": "عدد غير مناسب من عناوين H1",
-            "الحل": "استخدام عنوان H1 واحد فقط للصفحة",
-            "الأولوية": "عالية"
-        })
-    return recommendations if recommendations else [{
-        "المشكلة": "لا توجد مشاكل واضحة",
-        "الحل": "الاستمرار في تحديث المحتوى بشكل دوري",
-        "الأولوية": "منخفضة"
-    }]
-def _get_top_words(self, text, count=5):
-    """
-    Gets the most frequent meaningful words in the content
-    """
-    # Remove common Arabic and English stop words
-    stop_words = set(['و', 'في', 'من', 'على', 'the', 'and', 'in', 'of', 'to'])
-    words = text.lower().split()
-    word_freq = Counter(word for word in words if word not in stop_words and len(word) > 2)
-    return {word: count for word, count in word_freq.most_common(count)}

             "الروابط المكسورة": broken_links
         }
+   class SEOAnalyzer:
+    def _analyze_content(self, soup):
+        """
+        Analyzes webpage content for SEO factors
+        """
+        try:
+            # Extract all text content
+            text_content = ' '.join([p.text.strip() for p in soup.find_all(['p', 'div', 'article', 'section'])])
+            # Analyze headings hierarchy
+            headings = {f'h{i}': len(soup.find_all(f'h{i}')) for i in range(1, 7)}
+            # Calculate word count
+            words = text_content.split()
+            word_count = len(words)
+            # Calculate readability score
+            readability_score = self._calculate_readability(text_content)
+            # Analyze keyword density
+            keyword_density = self._calculate_keyword_density(text_content)
+            # Check for images with alt text
+            images = soup.find_all('img')
+            images_with_alt = len([img for img in images if img.get('alt')])
+            # Calculate content quality score
+            quality_score = self._calculate_content_quality_score(
+                word_count,
+                readability_score,
+                images_with_alt,
+                len(images),
+                headings
+            )
+            return {
+                "إحصائيات المحتوى": {
+                    "عدد الكلمات": word_count,
+                    "مستوى القراءة": readability_score,
+                    "نسبة الصور مع نص بديل": f"{(images_with_alt/len(images)*100 if images else 0):.1f}%",
+                    "توزيع العناوين": headings,
+                },
+                "تحليل الكلمات المفتاحية": {
+                    "كثافة الكلمات الرئيسية": keyword_density,
+                    "الكلمات الأكثر تكراراً": self._get_top_words(text_content, 5)
+                },
+                "تقييم جودة المحتوى": {
+                    "الدرجة": quality_score,
+                    "التقييم": self._get_content_rating(quality_score),
+                    "التوصيات": self._get_content_recommendations(
+                        word_count,
+                        readability_score,
+                        images_with_alt,
+                        len(images),
+                        headings
+                    )
+                }
+            }
+        except Exception as e:
+            return {"error": f"خطأ في تحليل المحتوى: {str(e)}"}
+    def _calculate_content_quality_score(self, word_count, readability, alt_images, total_images, headings):
+        """
+        Calculates a content quality score based on various factors
+        """
+        score = 100
+        # Word count scoring
+        if word_count < 300:
+            score -= 20
+        elif word_count < 600:
+            score -= 10
+        # Readability scoring
+        if readability < 40:
+            score -= 15
+        elif readability < 60:
+            score -= 10
+        # Image alt text scoring
+        if total_images > 0:
+            alt_ratio = alt_images / total_images
+            if alt_ratio < 0.5:
+                score -= 15
+            elif alt_ratio < 0.8:
+                score -= 10
+        # Heading hierarchy scoring
+        if headings.get('h1', 0) == 0:
+            score -= 10
+        if headings.get('h1', 0) > 1:
+            score -= 5
+        if headings.get('h2', 0) == 0:
+            score -= 5
+        return max(0, score)
+    def _get_content_rating(self, score):
+        """
+        Converts numerical score to qualitative rating
+        """
+        if score >= 90:
+            return "ممتاز"
+        elif score >= 80:
+            return "جيد جداً"
+        elif score >= 70:
+            return "جيد"
+        elif score >= 60:
+            return "مقبول"
+        else:
+            return "يحتاج تحسين"
+    def _get_content_recommendations(self, word_count, readability, alt_images, total_images, headings):
+        """
+        Generates content improvement recommendations
+        """
+        recommendations = []
+        if word_count < 300:
+            recommendations.append({
+                "المشكلة": "محتوى قصير جداً",
+                "الحل": "زيادة المحتوى إلى 300 كلمة على الأقل",
+                "الأولوية": "عالية"
+            })
+        if readability < 60:
+            recommendations.append({
+                "المشكلة": "صعوبة قراءة المحتوى",
+                "الحل": "تبسيط الجمل واستخدام لغة أسهل",
+                "الأولوية": "متوسطة"
+            })
+        if total_images > 0 and (alt_images / total_images) < 0.8:
+            recommendations.append({
+                "المشكلة": "نقص في النصوص البديلة للصور",
+                "الحل": "إضافة نص بديل وصفي لجميع الصور",
+                "الأولوية": "عالية"
+            })
+        if headings.get('h1', 0) != 1:
+            recommendations.append({
+                "المشكلة": "عدد غير مناسب من عناوين H1",
+                "الحل": "استخدام عنوان H1 واحد فقط للصفحة",
+                "الأولوية": "عالية"
+            })
+        return recommendations if recommendations else [{
+            "المشكلة": "لا توجد مشاكل واضحة",
+            "الحل": "الاستمرار في تحديث المحتوى بشكل دوري",
+            "الأولوية": "منخفضة"
+        }]
+    def _get_top_words(self, text, count=5):
+        """
+        Gets the most frequent meaningful words in the content
+        """
+        # Remove common Arabic and English stop words
+        stop_words = set(['و', 'في', 'من', 'على', 'the', 'and', 'in', 'of', 'to'])
+        words = text.lower().split()
+        word_freq = Counter(word for word in words if word not in stop_words and len(word) > 2)
+        return {word: count for word, count in word_freq.most_common(count)}