File size: 3,680 Bytes
eeae908
734bffb
c77e282
734bffb
6ca2249
88da9f3
6ca2249
31f35d4
96682d9
88da9f3
 
64b2d1d
 
 
 
 
88da9f3
c77e282
64b2d1d
734bffb
52a320c
 
64b2d1d
 
 
 
fea2cb5
 
 
64b2d1d
 
 
2a11a1f
64b2d1d
31f35d4
64b2d1d
 
 
2a11a1f
64b2d1d
52a320c
64b2d1d
 
 
 
 
 
 
 
734bffb
64b2d1d
 
 
 
88da9f3
 
0a6bdb5
52a320c
6ca2249
64b2d1d
 
0a6bdb5
64b2d1d
0a6bdb5
64b2d1d
0a6bdb5
2a11a1f
64b2d1d
fea2cb5
88da9f3
734bffb
88da9f3
64b2d1d
734bffb
31f35d4
0a6bdb5
88da9f3
6ca2249
734bffb
0a6bdb5
734bffb
88da9f3
 
 
 
 
734bffb
fea2cb5
64b2d1d
09d051e
64b2d1d
 
 
 
 
 
 
09d051e
 
 
 
 
64b2d1d
09d051e
2a11a1f
64b2d1d
 
 
 
 
 
11fd592
fea2cb5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import gradio as gr
import requests
import re
import os
import zipfile
import tempfile
from urllib.parse import urljoin
from bs4 import BeautifulSoup

def process_url(url):
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
            'Referer': 'https://www.radiofrance.fr/'
        }
        response = requests.get(url, headers=headers)
        response.raise_for_status()
    except Exception as e:
        return None, f"Erreur de connexion : {str(e)}"

    soup = BeautifulSoup(response.text, 'html.parser')
    mp3_links = []

    # Nouvelle méthode de détection
    scripts = soup.find_all('script', type='application/ld+json')
    for script in scripts:
        if script.string:
            matches = re.findall(r'"contentUrl"\s*:\s*"([^"]+?\.mp3)', script.string)
            for match in matches:
                full_url = urljoin(url, match.split('?')[0])
                if full_url not in mp3_links:
                    mp3_links.append(full_url)

    # Fallback pour les URLs dans les attributs data
    if not mp3_links:
        for tag in soup.find_all(attrs={"data-url": re.compile(r"\.mp3")}):
            mp3_url = urljoin(url, tag['data-url'].split('?')[0])
            mp3_links.append(mp3_url)

    # Dernier recours : recherche globale
    if not mp3_links:
        matches = re.findall(r'(https?://[^\s"\']+?\.mp3)', response.text)
        for match in matches:
            clean_url = urljoin(url, match.split('?')[0])
            if clean_url not in mp3_links and 'podcast' in clean_url:
                mp3_links.append(clean_url)

    # Filtrage final
    mp3_links = list(dict.fromkeys(mp3_links))  # Supprime les doublons

    if not mp3_links:
        return None, "Aucun épisode trouvé - Structure de page inconnue"

    # Téléchargement et création ZIP
    temp_dir = tempfile.mkdtemp()
    filenames = []
    
    for idx, mp3_url in enumerate(mp3_links, 1):
        try:
            filename = f"{idx:02d}_{os.path.basename(mp3_url)}"
            filepath = os.path.join(temp_dir, filename)
            
            with requests.get(mp3_url, headers=headers, stream=True) as r:
                r.raise_for_status()
                with open(filepath, 'wb') as f:
                    for chunk in r.iter_content(chunk_size=8192):
                        f.write(chunk)
            filenames.append(filepath)
        except Exception:
            continue

    if not filenames:
        return None, "Échec du téléchargement"

    zip_path = os.path.join(temp_dir, 'podcast.zip')
    with zipfile.ZipFile(zip_path, 'w') as zipf:
        for file in filenames:
            zipf.write(file, arcname=os.path.basename(file))

    return zip_path, None

def download_podcast(url):
    zip_path, error = process_url(url)
    if error:
        raise gr.Error(error)
    return zip_path

with gr.Blocks() as app:
    gr.Markdown("## 🎧 Téléchargeur Radio France")
    with gr.Row():
        url_input = gr.Textbox(
            label="URL du podcast",
            placeholder="Ex: https://www.radiofrance.fr/...",
            max_lines=1
        )
    btn = gr.Button("Télécharger les épisodes", variant="primary")
    output = gr.File(label="Fichier ZIP")
    
    examples = gr.Examples(
        examples=[[
            "https://www.radiofrance.fr/franceculture/podcasts/serie-le-secret-de-la-licorne-les-aventures-de-tintin"
        ]],
        inputs=[url_input]
    )

    btn.click(
        fn=download_podcast,
        inputs=url_input,
        outputs=output,
        api_name="download"
    )

app.launch()