audit_api / api-article-aps.py
dofbi's picture
Create api-article-aps.py
a7754d8 verified
from flask import Flask, jsonify
from bs4 import BeautifulSoup
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
import re
app = Flask(__name__)
KEYWORDS = ["élection présidentielle", "présidentielle", "élections présidentielles",
"élection législative", "législative", "élections législatives"]
START_URL = "https://aps.sn/politique/"
def clean_text(text):
if not text:
return ""
return re.sub(r'\s+', ' ', text).strip()
def fetch_election_articles():
options = Options()
options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")
driver = webdriver.Chrome(options=options)
driver.get(START_URL)
time.sleep(5)
soup = BeautifulSoup(driver.page_source, "html.parser")
article_links = soup.select("h6.p-ttl a.ttl-link") or soup.select("a.ttl-link")
results = []
for link in article_links[:50]:
url = link.get("href")
title = clean_text(link.text)
try:
driver.get(url)
time.sleep(3)
article_soup = BeautifulSoup(driver.page_source, "html.parser")
# Récupération du contenu
selectors = [
"div.td-post-content", "div.elementor-widget-theme-post-content",
"div.content-inner", "div.post-content", "article",
"main .entry-content", ".post-content-wrap"
]
content_tag = None
for selector in selectors:
content_tag = article_soup.select_one(selector)
if content_tag and len(content_tag.text.strip()) > 100:
break
content = clean_text(content_tag.text if content_tag else "")
if not content:
paragraphs = article_soup.select("p")
content = "\n\n".join([p.text for p in paragraphs if len(p.text.strip()) > 50])
if not content or len(content) < 100:
continue
combined_text = f"{title.lower()} {content.lower()}"
if any(kw in combined_text for kw in KEYWORDS):
description = content.split("\n")[0] if "\n" in content else content[:200]
results.append({
"title": title,
"description": description,
"content": content,
"url": url
})
except Exception as e:
print(f"Erreur pour {title}: {e}")
driver.quit()
return results
@app.route('/', methods=['GET'])
def index():
# Redirige vers l'API ou affiche un message simple
return "API d'articles sur les élections. Utilisez /api/election-articles pour obtenir les données."
@app.route('/api/articles-aps', methods=['GET'])
def get_election_articles():
articles = fetch_election_articles()
return jsonify(articles)
if __name__ == '__main__':
app.run(host = "0.0.0.0", debug=True, port=5001)