import gradio as gr import requests import base64 import os from pathlib import Path import tempfile import numpy as np import io from dotenv import load_dotenv # Load environment variables from .env file load_dotenv() # Maximum allowed input text length MAX_TEXT_LENGTH = 4000 # Language mapping with display names LANGUAGES = { "en": "English", "hi": "Hindi", "bn": "Bengali", "ta": "Tamil", "te": "Telugu", "ml": "Malayalam", "mr": "Marathi", "gu": "Gujarati", "kn": "Kannada" } def generate_audio(input_text, input_language, input_speaker): """ Generate audio from text using Krutrim TTS API """ # Check if text exceeds the character limit if len(input_text) > MAX_TEXT_LENGTH: return None, f"Error: Input text exceeds the maximum limit of {MAX_TEXT_LENGTH} characters. Your text has {len(input_text)} characters." # Get API key from environment variable api_key = os.getenv("KRUTRIM_API_KEY") if not api_key: return None, "Error: KRUTRIM_API_KEY environment variable not found. Please check your .env file." url = "https://cloud.olakrutrim.com/v1/audio/generations/krutrim-tts" headers = { "Content-Type": "application/json", "Authorization": f"Bearer {api_key}" } payload = { "modelName": "tts", "input_text": input_text, "input_language": input_language, "input_speaker": input_speaker } try: response = requests.post(url, json=payload, headers=headers) response.raise_for_status() # Raise an error for HTTP failure codes result = response.json() base64_audio = result["output"] audio_data = base64.b64decode(base64_audio) # Save to a temporary file for download option temp_dir = Path(tempfile.gettempdir()) output_path = temp_dir / "krutrim_output.wav" with open(output_path, "wb") as f: f.write(audio_data) # Convert to format suitable for browser playback try: # Convert WAV data to numpy array for Gradio's audio component audio_np = np.frombuffer(audio_data, dtype=np.int16) # Standard sample rate for most audio sample_rate = 16000 # Adjust this if your API returns a different sample rate return (sample_rate, audio_np), "Audio generated successfully! Click the play button to listen." except Exception as e: # Fallback to file path if conversion fails return str(output_path), f"Audio generated but playback in browser might not work. You can download the file. Error: {e}" except requests.exceptions.RequestException as e: return None, f"Error: {e}" # Create Gradio interface with gr.Blocks(title="Krutrim Text-to-Speech") as demo: gr.Markdown("# Krutrim Text-to-Speech Generator") gr.Markdown("Enter your text below and get it converted to speech using Krutrim's TTS API.") with gr.Row(): with gr.Column(): text_input = gr.Textbox(label="Text to convert to speech", placeholder="Type your text here (maximum 4000 characters)...", lines=5) language = gr.Dropdown(label="Language", choices=list(LANGUAGES.keys()), value="en") speaker = gr.Dropdown(label="Speaker", choices=["male", "female"], value="male") submit_btn = gr.Button("Generate Audio") with gr.Column(): audio_output = gr.Audio( label="Generated Audio", type="numpy", # Explicitly set to numpy for better browser compatibility autoplay=True, # Auto play the audio when generated show_download_button=True, # Show the download button for the audio waveform_options={"waveform_color": "blue", "waveform_progress_color": "red"}, # Customize waveform appearance format="wav" # Specify output format ) output_message = gr.Textbox(label="Status") submit_btn.click( fn=generate_audio, inputs=[text_input, language, speaker], outputs=[audio_output, output_message] ) if __name__ == "__main__": demo.launch()