import spaces import os import tempfile import gradio as gr from dotenv import load_dotenv import torch from scipy.io.wavfile import write from diffusers import DiffusionPipeline from transformers import pipeline from pathlib import Path load_dotenv() hf_token = os.getenv("HF_TKN") device_id = 0 if torch.cuda.is_available() else -1 captioning_pipeline = pipeline( "image-to-text", model="nlpconnect/vit-gpt2-image-captioning", device=device_id ) pipe = DiffusionPipeline.from_pretrained( "cvssp/audioldm2", use_auth_token=hf_token ) @spaces.GPU(duration=120) def analyze_image_with_free_model(image_file): try: with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as temp_file: temp_file.write(image_file) temp_image_path = temp_file.name results = captioning_pipeline(temp_image_path) if not results or not isinstance(results, list): return "Error: Could not generate caption.", True caption = results[0].get("generated_text", "").strip() if not caption: return "No caption was generated.", True return caption, False except Exception as e: return f"Error analyzing image: {e}", True @spaces.GPU(duration=120) def get_audioldm_from_caption(caption): try: pipe.to("cuda") audio_output = pipe( prompt=caption, num_inference_steps=50, guidance_scale=7.5 ) pipe.to("cpu") audio = audio_output.audios[0] with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav: write(temp_wav.name, 16000, audio) return temp_wav.name except Exception as e: print(f"Error generating audio from caption: {e}") return None css = """ #header-container { text-align: center; margin: 20px 0; } #header-title { font-size: 36px; font-weight: bold; margin-bottom: 10px; } #header-subtitle { font-size: 18px; margin-bottom: 20px; color: #6c757d; } #main-container { max-width: 900px; margin: 0 auto; padding: 20px; border-radius: 12px; background-color: #f9f9f9; box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1); } button.primary-button { background-color: #007bff; color: white; border: none; padding: 10px 20px; border-radius: 5px; font-size: 16px; cursor: pointer; } button.primary-button:hover { background-color: #0056b3; } """ with gr.Blocks(css=css) as demo: with gr.Column(elem_id="header-container"): gr.HTML("""