File size: 3,478 Bytes
eeae908 734bffb c77e282 734bffb 6ca2249 88da9f3 6ca2249 96682d9 88da9f3 2a11a1f 0a6bdb5 88da9f3 c77e282 734bffb 2a11a1f 88da9f3 0a6bdb5 2a11a1f 734bffb 2a11a1f 734bffb 09d051e 88da9f3 0a6bdb5 2a11a1f 6ca2249 2a11a1f 0a6bdb5 2a11a1f 0a6bdb5 2a11a1f 0a6bdb5 88da9f3 734bffb 88da9f3 2a11a1f 734bffb 2a11a1f 0a6bdb5 88da9f3 6ca2249 734bffb 0a6bdb5 734bffb 88da9f3 734bffb 2a11a1f 09d051e 2a11a1f 09d051e 2a11a1f 09d051e 2a11a1f 09d051e 2a11a1f 09d051e 2a11a1f 09d051e 11fd592 2a11a1f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
import gradio as gr
import requests
import re
import os
import zipfile
import tempfile
from urllib.parse import urljoin
def process_url(url):
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Referer': 'https://www.radiofrance.fr/'
}
response = requests.get(url, headers=headers)
response.raise_for_status()
except Exception as e:
return None, f"Erreur de connexion : {str(e)}"
# Nouvelle méthode de détection des MP3
mp3_links = []
# 1. Recherche dans les balises script
scripts = re.findall(r'<script.*?>(.*?)</script>', response.text, re.DOTALL)
for script in scripts:
matches = re.findall(r'(https://[^\s"\']+?\.mp3)', script)
mp3_links.extend(matches)
# 2. Recherche dans les attributs HTML
html_matches = re.findall(r'(?:href|src|rl|contentUrl)\s*=\s*["\'](.*?\.mp3.*?)["\']', response.text)
mp3_links.extend([urljoin(url, m.split('";')[0]) for m in html_matches])
# 3. Suppression des paramètres et dédoublonnage
clean_links = []
seen = set()
for link in mp3_links:
clean = link.split('?')[0].split('";')[0]
if clean not in seen:
seen.add(clean)
clean_links.append(clean)
if not clean_links:
return None, "Aucun MP3 trouvé - Essayez avec l'URL complète d'une série"
# Téléchargement
temp_dir = tempfile.mkdtemp()
filenames = []
for idx, mp3_url in enumerate(clean_links, 1):
try:
filename = f"{idx:02d}_{os.path.basename(mp3_url)}"
filepath = os.path.join(temp_dir, filename)
with requests.get(mp3_url, headers=headers, stream=True, timeout=10) as r:
r.raise_for_status()
with open(filepath, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
filenames.append(filepath)
except Exception as e:
continue
if not filenames:
return None, "Tous les téléchargements ont échoué"
# Création du ZIP
zip_path = os.path.join(temp_dir, 'radiofrance_podcast.zip')
with zipfile.ZipFile(zip_path, 'w') as zipf:
for file in filenames:
zipf.write(file, arcname=os.path.basename(file))
return zip_path, None
def download_podcast(url):
zip_path, error = process_url(url)
if error:
raise gr.Error(error)
return zip_path
with gr.Blocks(title="RadioFrance Podcaster") as app:
gr.Markdown("## 🎧 Téléchargement de podcasts Radio France")
with gr.Row():
url_input = gr.Textbox(
label="URL de la série podcast",
placeholder="Ex: https://www.radiofrance.fr/.../mon-podcast",
max_lines=1
)
btn = gr.Button("Télécharger les épisodes", variant="primary")
output = gr.File(label="Fichier ZIP contenant les MP3")
examples = gr.Examples(
examples=[[
"https://www.radiofrance.fr/franceculture/podcasts/serie-le-secret-de-la-licorne-les-aventures-de-tintin"
]],
inputs=[url_input],
label="Exemple fonctionnel"
)
btn.click(
fn=download_podcast,
inputs=url_input,
outputs=output,
api_name="download"
)
app.launch(show_error=True, share=False) |