File size: 3,676 Bytes
eeae908 734bffb c77e282 734bffb 6ca2249 88da9f3 6ca2249 31f35d4 96682d9 13a82a1 88da9f3 64b2d1d 88da9f3 c77e282 13a82a1 734bffb 52a320c 64b2d1d 13a82a1 64b2d1d fea2cb5 64b2d1d 2a11a1f 13a82a1 31f35d4 13a82a1 2a11a1f 13a82a1 d2a375e 734bffb 64b2d1d 13a82a1 64b2d1d 13a82a1 88da9f3 0a6bdb5 52a320c 6ca2249 64b2d1d 0a6bdb5 64b2d1d 0a6bdb5 64b2d1d 0a6bdb5 2a11a1f 64b2d1d fea2cb5 88da9f3 734bffb 88da9f3 64b2d1d 734bffb 13a82a1 31f35d4 0a6bdb5 88da9f3 6ca2249 734bffb 0a6bdb5 734bffb 13a82a1 88da9f3 734bffb fea2cb5 13a82a1 09d051e 64b2d1d 13a82a1 d2a375e 13a82a1 09d051e 96836d3 09d051e 13a82a1 09d051e 2a11a1f 64b2d1d 13a82a1 64b2d1d 11fd592 fea2cb5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
import gradio as gr
import requests
import re
import os
import zipfile
import tempfile
from urllib.parse import urljoin
from bs4 import BeautifulSoup
def process_url(url, num_episodes):
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Referer': 'https://www.radiofrance.fr/'
}
response = requests.get(url, headers=headers)
response.raise_for_status()
except Exception as e:
return None, f"Erreur : {str(e)}"
soup = BeautifulSoup(response.text, 'html.parser')
mp3_links = []
# Extraction des liens MP3
scripts = soup.find_all('script', type='application/ld+json')
for script in scripts:
if script.string:
matches = re.findall(r'"contentUrl"\s*:\s*"([^"]+?\.mp3)', script.string)
for match in matches:
full_url = urljoin(url, match.split('?')[0])
if full_url not in mp3_links:
mp3_links.append(full_url)
# Fallback si nécessaire
if not mp3_links:
matches = re.findall(r'(https?://media\.radiofrance-podcast\.net[^\s"\']+?\.mp3)', response.text)
mp3_links = list(dict.fromkeys(matches))
# Application du nombre d'épisodes demandé
try:
num_episodes = int(num_episodes)
if num_episodes > 0:
mp3_links = mp3_links[:num_episodes]
except:
pass # Si valeur invalide, on prend tout
if not mp3_links:
return None, "Aucun épisode trouvé"
# Téléchargement
temp_dir = tempfile.mkdtemp()
filenames = []
for idx, mp3_url in enumerate(mp3_links, 1):
try:
filename = f"{idx:02d}_{os.path.basename(mp3_url)}"
filepath = os.path.join(temp_dir, filename)
with requests.get(mp3_url, headers=headers, stream=True) as r:
r.raise_for_status()
with open(filepath, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
filenames.append(filepath)
except Exception:
continue
if not filenames:
return None, "Échec du téléchargement"
# Création du ZIP
zip_path = os.path.join(temp_dir, 'podcast.zip')
with zipfile.ZipFile(zip_path, 'w') as zipf:
for file in filenames:
zipf.write(file, arcname=os.path.basename(file))
return zip_path, None
def download_podcast(url, num_episodes):
zip_path, error = process_url(url, num_episodes)
if error:
raise gr.Error(error)
return zip_path
with gr.Blocks() as app:
gr.Markdown("## 🎧 Téléchargeur Radio France - Contrôle des épisodes")
with gr.Row():
url_input = gr.Textbox(
label="URL du podcast",
placeholder="Ex: https://www.radiofrance.fr/...",
max_lines=1
)
num_input = gr.Number(
label="Nombre d'épisodes (0 = tous)",
value=0,
minimum=0,
step=1,
precision=0
)
btn = gr.Button("Télécharger", variant="primary")
output = gr.File(label="Fichier ZIP résultant")
examples = gr.Examples(
examples=[[
"https://www.radiofrance.fr/franceculture/podcasts/serie-la-nuit-du-merveilleux-scientifique",
11
]],
inputs=[url_input, num_input]
)
btn.click(
fn=download_podcast,
inputs=[url_input, num_input],
outputs=output,
api_name="download"
)
app.launch() |