Update app.py
Browse files
app.py
CHANGED
@@ -374,164 +374,165 @@ class AdvancedWebsiteAnalyzer:
|
|
374 |
"الروابط المكسورة": broken_links
|
375 |
}
|
376 |
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
384 |
|
385 |
-
#
|
386 |
-
|
|
|
|
|
|
|
387 |
|
388 |
-
#
|
389 |
-
|
390 |
-
|
|
|
|
|
391 |
|
392 |
-
#
|
393 |
-
|
|
|
|
|
|
|
|
|
|
|
394 |
|
395 |
-
#
|
396 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
397 |
|
398 |
-
|
399 |
-
|
400 |
-
|
|
|
|
|
|
|
401 |
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
headings
|
409 |
-
)
|
410 |
|
411 |
-
|
412 |
-
|
413 |
-
"
|
414 |
-
"
|
415 |
-
"
|
416 |
-
|
417 |
-
},
|
418 |
-
"تحليل الكلمات المفتاحية": {
|
419 |
-
"كثافة الكلمات الرئيسية": keyword_density,
|
420 |
-
"الكلمات الأكثر تكراراً": self._get_top_words(text_content, 5)
|
421 |
-
},
|
422 |
-
"تقييم جودة المحتوى": {
|
423 |
-
"الدرجة": quality_score,
|
424 |
-
"التقييم": self._get_content_rating(quality_score),
|
425 |
-
"التوصيات": self._get_content_recommendations(
|
426 |
-
word_count,
|
427 |
-
readability_score,
|
428 |
-
images_with_alt,
|
429 |
-
len(images),
|
430 |
-
headings
|
431 |
-
)
|
432 |
-
}
|
433 |
-
}
|
434 |
-
except Exception as e:
|
435 |
-
return {"error": f"خطأ في تحليل المحتوى: {str(e)}"}
|
436 |
-
|
437 |
-
def _calculate_content_quality_score(self, word_count, readability, alt_images, total_images, headings):
|
438 |
-
"""
|
439 |
-
Calculates a content quality score based on various factors
|
440 |
-
"""
|
441 |
-
score = 100
|
442 |
-
|
443 |
-
# Word count scoring
|
444 |
-
if word_count < 300:
|
445 |
-
score -= 20
|
446 |
-
elif word_count < 600:
|
447 |
-
score -= 10
|
448 |
-
|
449 |
-
# Readability scoring
|
450 |
-
if readability < 40:
|
451 |
-
score -= 15
|
452 |
-
elif readability < 60:
|
453 |
-
score -= 10
|
454 |
-
|
455 |
-
# Image alt text scoring
|
456 |
-
if total_images > 0:
|
457 |
-
alt_ratio = alt_images / total_images
|
458 |
-
if alt_ratio < 0.5:
|
459 |
-
score -= 15
|
460 |
-
elif alt_ratio < 0.8:
|
461 |
-
score -= 10
|
462 |
-
|
463 |
-
# Heading hierarchy scoring
|
464 |
-
if headings.get('h1', 0) == 0:
|
465 |
-
score -= 10
|
466 |
-
if headings.get('h1', 0) > 1:
|
467 |
-
score -= 5
|
468 |
-
if headings.get('h2', 0) == 0:
|
469 |
-
score -= 5
|
470 |
|
471 |
-
|
472 |
-
|
473 |
-
|
474 |
-
|
475 |
-
|
476 |
-
|
477 |
-
|
478 |
-
return
|
479 |
-
|
480 |
-
|
481 |
-
|
482 |
-
|
483 |
-
elif score >= 60:
|
484 |
-
return "مقبول"
|
485 |
-
else:
|
486 |
-
return "يحتاج تحسين"
|
487 |
-
|
488 |
-
def _get_content_recommendations(self, word_count, readability, alt_images, total_images, headings):
|
489 |
-
"""
|
490 |
-
Generates content improvement recommendations
|
491 |
-
"""
|
492 |
-
recommendations = []
|
493 |
-
|
494 |
-
if word_count < 300:
|
495 |
-
recommendations.append({
|
496 |
-
"المشكلة": "محتوى قصير جداً",
|
497 |
-
"الحل": "زيادة المحتوى إلى 300 كلمة على الأقل",
|
498 |
-
"الأولوية": "عالية"
|
499 |
-
})
|
500 |
-
|
501 |
-
if readability < 60:
|
502 |
-
recommendations.append({
|
503 |
-
"المشكلة": "صعوبة قراءة المحتوى",
|
504 |
-
"الحل": "تبسيط الجمل واستخدام لغة أسهل",
|
505 |
-
"الأولوية": "متوسطة"
|
506 |
-
})
|
507 |
-
|
508 |
-
if total_images > 0 and (alt_images / total_images) < 0.8:
|
509 |
-
recommendations.append({
|
510 |
-
"المشكلة": "نقص في النصوص البديلة للصور",
|
511 |
-
"الحل": "إضافة نص بديل وصفي لجميع الصور",
|
512 |
-
"الأولوية": "عالية"
|
513 |
-
})
|
514 |
-
|
515 |
-
if headings.get('h1', 0) != 1:
|
516 |
-
recommendations.append({
|
517 |
-
"المشكلة": "عدد غير مناسب من عناوين H1",
|
518 |
-
"الحل": "استخدام عنوان H1 واحد فقط للصفحة",
|
519 |
-
"الأولوية": "عالية"
|
520 |
-
})
|
521 |
-
|
522 |
-
return recommendations if recommendations else [{
|
523 |
-
"المشكلة": "لا توجد مشاكل واضحة",
|
524 |
-
"الحل": "الاستمرار في تحديث المحتوى بشكل دوري",
|
525 |
-
"الأولوية": "منخفضة"
|
526 |
-
}]
|
527 |
|
528 |
-
def _get_top_words(self, text, count=5):
|
529 |
-
|
530 |
-
|
531 |
-
|
532 |
-
|
533 |
-
|
534 |
-
|
535 |
-
|
536 |
-
|
537 |
-
|
|
|
374 |
"الروابط المكسورة": broken_links
|
375 |
}
|
376 |
|
377 |
+
class SEOAnalyzer:
|
378 |
+
def _analyze_content(self, soup):
|
379 |
+
"""
|
380 |
+
Analyzes webpage content for SEO factors
|
381 |
+
"""
|
382 |
+
try:
|
383 |
+
# Extract all text content
|
384 |
+
text_content = ' '.join([p.text.strip() for p in soup.find_all(['p', 'div', 'article', 'section'])])
|
385 |
+
|
386 |
+
# Analyze headings hierarchy
|
387 |
+
headings = {f'h{i}': len(soup.find_all(f'h{i}')) for i in range(1, 7)}
|
388 |
+
|
389 |
+
# Calculate word count
|
390 |
+
words = text_content.split()
|
391 |
+
word_count = len(words)
|
392 |
+
|
393 |
+
# Calculate readability score
|
394 |
+
readability_score = self._calculate_readability(text_content)
|
395 |
+
|
396 |
+
# Analyze keyword density
|
397 |
+
keyword_density = self._calculate_keyword_density(text_content)
|
398 |
+
|
399 |
+
# Check for images with alt text
|
400 |
+
images = soup.find_all('img')
|
401 |
+
images_with_alt = len([img for img in images if img.get('alt')])
|
402 |
+
|
403 |
+
# Calculate content quality score
|
404 |
+
quality_score = self._calculate_content_quality_score(
|
405 |
+
word_count,
|
406 |
+
readability_score,
|
407 |
+
images_with_alt,
|
408 |
+
len(images),
|
409 |
+
headings
|
410 |
+
)
|
411 |
+
|
412 |
+
return {
|
413 |
+
"إحصائيات المحتوى": {
|
414 |
+
"عدد الكلمات": word_count,
|
415 |
+
"مستوى القراءة": readability_score,
|
416 |
+
"نسبة الصور مع نص بديل": f"{(images_with_alt/len(images)*100 if images else 0):.1f}%",
|
417 |
+
"توزيع العناوين": headings,
|
418 |
+
},
|
419 |
+
"تحليل الكلمات المفتاحية": {
|
420 |
+
"كثافة الكلمات الرئيسية": keyword_density,
|
421 |
+
"الكلمات الأكثر تكراراً": self._get_top_words(text_content, 5)
|
422 |
+
},
|
423 |
+
"تقييم جودة المحتوى": {
|
424 |
+
"الدرجة": quality_score,
|
425 |
+
"التقييم": self._get_content_rating(quality_score),
|
426 |
+
"التوصيات": self._get_content_recommendations(
|
427 |
+
word_count,
|
428 |
+
readability_score,
|
429 |
+
images_with_alt,
|
430 |
+
len(images),
|
431 |
+
headings
|
432 |
+
)
|
433 |
+
}
|
434 |
+
}
|
435 |
+
except Exception as e:
|
436 |
+
return {"error": f"خطأ في تحليل المحتوى: {str(e)}"}
|
437 |
+
|
438 |
+
def _calculate_content_quality_score(self, word_count, readability, alt_images, total_images, headings):
|
439 |
+
"""
|
440 |
+
Calculates a content quality score based on various factors
|
441 |
+
"""
|
442 |
+
score = 100
|
443 |
|
444 |
+
# Word count scoring
|
445 |
+
if word_count < 300:
|
446 |
+
score -= 20
|
447 |
+
elif word_count < 600:
|
448 |
+
score -= 10
|
449 |
|
450 |
+
# Readability scoring
|
451 |
+
if readability < 40:
|
452 |
+
score -= 15
|
453 |
+
elif readability < 60:
|
454 |
+
score -= 10
|
455 |
|
456 |
+
# Image alt text scoring
|
457 |
+
if total_images > 0:
|
458 |
+
alt_ratio = alt_images / total_images
|
459 |
+
if alt_ratio < 0.5:
|
460 |
+
score -= 15
|
461 |
+
elif alt_ratio < 0.8:
|
462 |
+
score -= 10
|
463 |
|
464 |
+
# Heading hierarchy scoring
|
465 |
+
if headings.get('h1', 0) == 0:
|
466 |
+
score -= 10
|
467 |
+
if headings.get('h1', 0) > 1:
|
468 |
+
score -= 5
|
469 |
+
if headings.get('h2', 0) == 0:
|
470 |
+
score -= 5
|
471 |
+
|
472 |
+
return max(0, score)
|
473 |
+
|
474 |
+
def _get_content_rating(self, score):
|
475 |
+
"""
|
476 |
+
Converts numerical score to qualitative rating
|
477 |
+
"""
|
478 |
+
if score >= 90:
|
479 |
+
return "ممتاز"
|
480 |
+
elif score >= 80:
|
481 |
+
return "جيد جداً"
|
482 |
+
elif score >= 70:
|
483 |
+
return "جيد"
|
484 |
+
elif score >= 60:
|
485 |
+
return "مقبول"
|
486 |
+
else:
|
487 |
+
return "يحتاج تحسين"
|
488 |
+
|
489 |
+
def _get_content_recommendations(self, word_count, readability, alt_images, total_images, headings):
|
490 |
+
"""
|
491 |
+
Generates content improvement recommendations
|
492 |
+
"""
|
493 |
+
recommendations = []
|
494 |
|
495 |
+
if word_count < 300:
|
496 |
+
recommendations.append({
|
497 |
+
"المشكلة": "محتوى قصير جداً",
|
498 |
+
"الحل": "زيادة المحتوى إلى 300 كلمة على الأقل",
|
499 |
+
"الأولوية": "عالية"
|
500 |
+
})
|
501 |
|
502 |
+
if readability < 60:
|
503 |
+
recommendations.append({
|
504 |
+
"المشكلة": "صعوبة قراءة المحتوى",
|
505 |
+
"الحل": "تبسيط الجمل واستخدام لغة أسهل",
|
506 |
+
"الأولوية": "متوسطة"
|
507 |
+
})
|
|
|
|
|
508 |
|
509 |
+
if total_images > 0 and (alt_images / total_images) < 0.8:
|
510 |
+
recommendations.append({
|
511 |
+
"المشكلة": "نقص في النصوص البديلة للصور",
|
512 |
+
"الحل": "إضافة نص بديل وصفي لجميع الصور",
|
513 |
+
"الأولوية": "عالية"
|
514 |
+
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
515 |
|
516 |
+
if headings.get('h1', 0) != 1:
|
517 |
+
recommendations.append({
|
518 |
+
"المشكلة": "عدد غير مناسب من عناوين H1",
|
519 |
+
"الحل": "استخدام عنوان H1 واحد فقط للصفحة",
|
520 |
+
"الأولوية": "عالية"
|
521 |
+
})
|
522 |
+
|
523 |
+
return recommendations if recommendations else [{
|
524 |
+
"المشكلة": "لا توجد مشاكل واضحة",
|
525 |
+
"الحل": "الاستمرار في تحديث المحتوى بشكل دوري",
|
526 |
+
"الأولوية": "منخفضة"
|
527 |
+
}]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
528 |
|
529 |
+
def _get_top_words(self, text, count=5):
|
530 |
+
"""
|
531 |
+
Gets the most frequent meaningful words in the content
|
532 |
+
"""
|
533 |
+
# Remove common Arabic and English stop words
|
534 |
+
stop_words = set(['و', 'في', 'من', 'على', 'the', 'and', 'in', 'of', 'to'])
|
535 |
+
words = text.lower().split()
|
536 |
+
word_freq = Counter(word for word in words if word not in stop_words and len(word) > 2)
|
537 |
+
|
538 |
+
return {word: count for word, count in word_freq.most_common(count)}
|