dofbi commited on
Commit
a7754d8
·
verified ·
1 Parent(s): ad22f4e

Create api-article-aps.py

Browse files
Files changed (1) hide show
  1. api-article-aps.py +90 -0
api-article-aps.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, jsonify
2
+ from bs4 import BeautifulSoup
3
+ from datetime import datetime
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.options import Options
6
+ import time
7
+ import re
8
+
9
+ app = Flask(__name__)
10
+
11
+ KEYWORDS = ["élection présidentielle", "présidentielle", "élections présidentielles",
12
+ "élection législative", "législative", "élections législatives"]
13
+
14
+ START_URL = "https://aps.sn/politique/"
15
+
16
+ def clean_text(text):
17
+ if not text:
18
+ return ""
19
+ return re.sub(r'\s+', ' ', text).strip()
20
+
21
+ def fetch_election_articles():
22
+ options = Options()
23
+ options.add_argument("--headless")
24
+ options.add_argument("--disable-gpu")
25
+ options.add_argument("--no-sandbox")
26
+ driver = webdriver.Chrome(options=options)
27
+
28
+ driver.get(START_URL)
29
+ time.sleep(5)
30
+ soup = BeautifulSoup(driver.page_source, "html.parser")
31
+ article_links = soup.select("h6.p-ttl a.ttl-link") or soup.select("a.ttl-link")
32
+
33
+ results = []
34
+
35
+ for link in article_links[:50]:
36
+ url = link.get("href")
37
+ title = clean_text(link.text)
38
+
39
+ try:
40
+ driver.get(url)
41
+ time.sleep(3)
42
+ article_soup = BeautifulSoup(driver.page_source, "html.parser")
43
+
44
+ # Récupération du contenu
45
+ selectors = [
46
+ "div.td-post-content", "div.elementor-widget-theme-post-content",
47
+ "div.content-inner", "div.post-content", "article",
48
+ "main .entry-content", ".post-content-wrap"
49
+ ]
50
+ content_tag = None
51
+ for selector in selectors:
52
+ content_tag = article_soup.select_one(selector)
53
+ if content_tag and len(content_tag.text.strip()) > 100:
54
+ break
55
+ content = clean_text(content_tag.text if content_tag else "")
56
+ if not content:
57
+ paragraphs = article_soup.select("p")
58
+ content = "\n\n".join([p.text for p in paragraphs if len(p.text.strip()) > 50])
59
+
60
+ if not content or len(content) < 100:
61
+ continue
62
+
63
+ combined_text = f"{title.lower()} {content.lower()}"
64
+ if any(kw in combined_text for kw in KEYWORDS):
65
+ description = content.split("\n")[0] if "\n" in content else content[:200]
66
+ results.append({
67
+ "title": title,
68
+ "description": description,
69
+ "content": content,
70
+ "url": url
71
+ })
72
+
73
+ except Exception as e:
74
+ print(f"Erreur pour {title}: {e}")
75
+
76
+ driver.quit()
77
+ return results
78
+
79
+ @app.route('/', methods=['GET'])
80
+ def index():
81
+ # Redirige vers l'API ou affiche un message simple
82
+ return "API d'articles sur les élections. Utilisez /api/election-articles pour obtenir les données."
83
+
84
+ @app.route('/api/articles-aps', methods=['GET'])
85
+ def get_election_articles():
86
+ articles = fetch_election_articles()
87
+ return jsonify(articles)
88
+
89
+ if __name__ == '__main__':
90
+ app.run(host = "0.0.0.0", debug=True, port=5001)