Ribot commited on
Commit
11fd592
·
verified ·
1 Parent(s): a65fa18

Create app.py

Browse files

Taking a source code of a podcast to extract MP3 in a zio

Files changed (1) hide show
  1. app.py +90 -0
app.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import sys
4
+ import subprocess
5
+ import importlib.util
6
+ import shutil
7
+ import tempfile
8
+
9
+ # === INSTALLATION AUTOMATIQUE DES DÉPENDANCES ===
10
+ def install_if_missing(package_name, import_name=None):
11
+ import_name = import_name or package_name
12
+ if importlib.util.find_spec(import_name) is None:
13
+ print(f"Installation de {package_name}...")
14
+ subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
15
+
16
+ for package in [
17
+ ("requests",),
18
+ ("bs4", "bs4"),
19
+ ("gradio",),
20
+ ]:
21
+ install_if_missing(*package)
22
+
23
+ # === IMPORTS ===
24
+ import requests
25
+ import gradio as gr
26
+ from bs4 import BeautifulSoup
27
+
28
+ # === UTILITAIRES ===
29
+ def slugify(text, max_length=50):
30
+ text = text.lower()
31
+ text = re.sub(r'[^\w\s-]', '', text)
32
+ text = re.sub(r'[-\s]+', '_', text)
33
+ return text[:max_length].strip('_')
34
+
35
+ def extract_mp3_links_and_title(url):
36
+ response = requests.get(url)
37
+ response.raise_for_status()
38
+ soup = BeautifulSoup(response.text, 'html.parser')
39
+
40
+ # Titre du podcast
41
+ title_tag = soup.find('h1') or soup.find('title')
42
+ podcast_title = slugify(title_tag.get_text()) if title_tag else "podcast"
43
+
44
+ # Liens MP3
45
+ mp3_links = [a['href'] for a in soup.find_all('a', href=True) if a['href'].endswith('.mp3')]
46
+
47
+ return podcast_title, mp3_links
48
+
49
+ def download_and_zip_podcast(url):
50
+ try:
51
+ podcast_title, mp3_links = extract_mp3_links_and_title(url)
52
+ if not mp3_links:
53
+ return "Aucun fichier MP3 trouvé.", None
54
+
55
+ temp_dir = tempfile.mkdtemp()
56
+ for i, mp3_url in enumerate(mp3_links, start=1):
57
+ filename = f"{podcast_title}_{i:02}.mp3"
58
+ filepath = os.path.join(temp_dir, filename)
59
+ with requests.get(mp3_url, stream=True) as r:
60
+ r.raise_for_status()
61
+ with open(filepath, 'wb') as f:
62
+ for chunk in r.iter_content(chunk_size=8192):
63
+ f.write(chunk)
64
+
65
+ zip_path = os.path.join(temp_dir, f"{podcast_title}.zip")
66
+ shutil.make_archive(zip_path.replace('.zip', ''), 'zip', temp_dir)
67
+
68
+ return f"{len(mp3_links)} fichiers téléchargés avec succès.", zip_path
69
+
70
+ except Exception as e:
71
+ return f"Erreur : {str(e)}", None
72
+
73
+ # === INTERFACE GRADIO ===
74
+ with gr.Blocks() as app:
75
+ gr.Markdown("# Téléchargeur de Podcasts MP3")
76
+ with gr.Row():
77
+ url_input = gr.Textbox(label="URL de la page série", placeholder="https://www.radiofrance.fr/...")
78
+ download_button = gr.Button("Télécharger et compresser")
79
+ output_text = gr.Textbox(label="Message")
80
+ file_output = gr.File(label="Fichier ZIP", file_types=[".zip"])
81
+
82
+ def process(url):
83
+ message, zip_file = download_and_zip_podcast(url)
84
+ return message, zip_file
85
+
86
+ download_button.click(fn=process, inputs=[url_input], outputs=[output_text, file_output])
87
+
88
+ # === LANCEMENT LOCAL OU SUR HUGGINGFACE ===
89
+ if __name__ == "__main__":
90
+ app.launch(share=True) # `share=True` utile pour Hugging Face