|
import gradio as gr |
|
import requests |
|
import re |
|
import os |
|
import zipfile |
|
import tempfile |
|
from urllib.parse import urljoin |
|
|
|
def process_url(url): |
|
try: |
|
headers = { |
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', |
|
'Referer': 'https://www.radiofrance.fr/' |
|
} |
|
response = requests.get(url, headers=headers) |
|
response.raise_for_status() |
|
except Exception as e: |
|
return None, f"Erreur de connexion : {str(e)}" |
|
|
|
|
|
mp3_links = [] |
|
|
|
|
|
scripts = re.findall(r'<script.*?>(.*?)</script>', response.text, re.DOTALL) |
|
for script in scripts: |
|
matches = re.findall(r'(https://[^\s"\']+?\.mp3)', script) |
|
mp3_links.extend(matches) |
|
|
|
|
|
html_matches = re.findall(r'(?:href|src|rl|contentUrl)\s*=\s*["\'](.*?\.mp3.*?)["\']', response.text) |
|
mp3_links.extend([urljoin(url, m.split('";')[0]) for m in html_matches]) |
|
|
|
|
|
clean_links = [] |
|
seen = set() |
|
for link in mp3_links: |
|
clean = link.split('?')[0].split('";')[0] |
|
if clean not in seen: |
|
seen.add(clean) |
|
clean_links.append(clean) |
|
|
|
if not clean_links: |
|
return None, "Aucun MP3 trouvé - Essayez avec l'URL complète d'une série" |
|
|
|
|
|
temp_dir = tempfile.mkdtemp() |
|
filenames = [] |
|
|
|
for idx, mp3_url in enumerate(clean_links, 1): |
|
try: |
|
filename = f"{idx:02d}_{os.path.basename(mp3_url)}" |
|
filepath = os.path.join(temp_dir, filename) |
|
|
|
with requests.get(mp3_url, headers=headers, stream=True, timeout=10) as r: |
|
r.raise_for_status() |
|
with open(filepath, 'wb') as f: |
|
for chunk in r.iter_content(chunk_size=8192): |
|
f.write(chunk) |
|
filenames.append(filepath) |
|
except Exception as e: |
|
continue |
|
|
|
if not filenames: |
|
return None, "Tous les téléchargements ont échoué" |
|
|
|
|
|
zip_path = os.path.join(temp_dir, 'radiofrance_podcast.zip') |
|
with zipfile.ZipFile(zip_path, 'w') as zipf: |
|
for file in filenames: |
|
zipf.write(file, arcname=os.path.basename(file)) |
|
|
|
return zip_path, None |
|
|
|
def download_podcast(url): |
|
zip_path, error = process_url(url) |
|
if error: |
|
raise gr.Error(error) |
|
return zip_path |
|
|
|
with gr.Blocks(title="RadioFrance Podcaster") as app: |
|
gr.Markdown("## 🎧 Téléchargement de podcasts Radio France") |
|
with gr.Row(): |
|
url_input = gr.Textbox( |
|
label="URL de la série podcast", |
|
placeholder="Ex: https://www.radiofrance.fr/.../mon-podcast", |
|
max_lines=1 |
|
) |
|
btn = gr.Button("Télécharger les épisodes", variant="primary") |
|
output = gr.File(label="Fichier ZIP contenant les MP3") |
|
|
|
examples = gr.Examples( |
|
examples=[[ |
|
"https://www.radiofrance.fr/franceculture/podcasts/serie-le-secret-de-la-licorne-les-aventures-de-tintin" |
|
]], |
|
inputs=[url_input], |
|
label="Exemple fonctionnel" |
|
) |
|
|
|
btn.click( |
|
fn=download_podcast, |
|
inputs=url_input, |
|
outputs=output, |
|
api_name="download" |
|
) |
|
|
|
app.launch(show_error=True, share=False) |