Spaces:
Running
Running
Upload 2 files
Browse files- app.py +123 -0
- requirements.txt +4 -0
app.py
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import requests
|
3 |
+
import base64
|
4 |
+
import os
|
5 |
+
from pathlib import Path
|
6 |
+
import tempfile
|
7 |
+
import numpy as np
|
8 |
+
import io
|
9 |
+
from dotenv import load_dotenv
|
10 |
+
|
11 |
+
# Load environment variables from .env file
|
12 |
+
load_dotenv()
|
13 |
+
|
14 |
+
# Maximum allowed input text length
|
15 |
+
MAX_TEXT_LENGTH = 4000
|
16 |
+
|
17 |
+
# Language mapping with display names
|
18 |
+
LANGUAGES = {
|
19 |
+
"en": "English",
|
20 |
+
"hi": "Hindi",
|
21 |
+
"bn": "Bengali",
|
22 |
+
"ta": "Tamil",
|
23 |
+
"te": "Telugu",
|
24 |
+
"ml": "Malayalam",
|
25 |
+
"mr": "Marathi",
|
26 |
+
"gu": "Gujarati",
|
27 |
+
"kn": "Kannada"
|
28 |
+
}
|
29 |
+
|
30 |
+
def generate_audio(input_text, input_language, input_speaker):
|
31 |
+
"""
|
32 |
+
Generate audio from text using Krutrim TTS API
|
33 |
+
"""
|
34 |
+
# Check if text exceeds the character limit
|
35 |
+
if len(input_text) > MAX_TEXT_LENGTH:
|
36 |
+
return None, f"Error: Input text exceeds the maximum limit of {MAX_TEXT_LENGTH} characters. Your text has {len(input_text)} characters."
|
37 |
+
|
38 |
+
# Get API key from environment variable
|
39 |
+
api_key = os.getenv("KRUTRIM_API_KEY")
|
40 |
+
if not api_key:
|
41 |
+
return None, "Error: KRUTRIM_API_KEY environment variable not found. Please check your .env file."
|
42 |
+
|
43 |
+
url = "https://cloud.olakrutrim.com/v1/audio/generations/krutrim-tts"
|
44 |
+
headers = {
|
45 |
+
"Content-Type": "application/json",
|
46 |
+
"Authorization": f"Bearer {api_key}"
|
47 |
+
}
|
48 |
+
|
49 |
+
payload = {
|
50 |
+
"modelName": "tts",
|
51 |
+
"input_text": input_text,
|
52 |
+
"input_language": input_language,
|
53 |
+
"input_speaker": input_speaker
|
54 |
+
}
|
55 |
+
|
56 |
+
try:
|
57 |
+
response = requests.post(url, json=payload, headers=headers)
|
58 |
+
response.raise_for_status() # Raise an error for HTTP failure codes
|
59 |
+
|
60 |
+
result = response.json()
|
61 |
+
base64_audio = result["output"]
|
62 |
+
|
63 |
+
audio_data = base64.b64decode(base64_audio)
|
64 |
+
|
65 |
+
# Save to a temporary file for download option
|
66 |
+
temp_dir = Path(tempfile.gettempdir())
|
67 |
+
output_path = temp_dir / "krutrim_output.wav"
|
68 |
+
|
69 |
+
with open(output_path, "wb") as f:
|
70 |
+
f.write(audio_data)
|
71 |
+
|
72 |
+
# Convert to format suitable for browser playback
|
73 |
+
try:
|
74 |
+
# Convert WAV data to numpy array for Gradio's audio component
|
75 |
+
audio_np = np.frombuffer(audio_data, dtype=np.int16)
|
76 |
+
# Standard sample rate for most audio
|
77 |
+
sample_rate = 16000 # Adjust this if your API returns a different sample rate
|
78 |
+
|
79 |
+
return (sample_rate, audio_np), "Audio generated successfully! Click the play button to listen."
|
80 |
+
except Exception as e:
|
81 |
+
# Fallback to file path if conversion fails
|
82 |
+
return str(output_path), f"Audio generated but playback in browser might not work. You can download the file. Error: {e}"
|
83 |
+
|
84 |
+
except requests.exceptions.RequestException as e:
|
85 |
+
return None, f"Error: {e}"
|
86 |
+
|
87 |
+
# Create Gradio interface
|
88 |
+
with gr.Blocks(title="Krutrim Text-to-Speech") as demo:
|
89 |
+
gr.Markdown("# Krutrim Text-to-Speech Generator")
|
90 |
+
gr.Markdown("Enter your text below and get it converted to speech using Krutrim's TTS API.")
|
91 |
+
|
92 |
+
with gr.Row():
|
93 |
+
with gr.Column():
|
94 |
+
text_input = gr.Textbox(label="Text to convert to speech",
|
95 |
+
placeholder="Type your text here (maximum 4000 characters)...",
|
96 |
+
lines=5)
|
97 |
+
language = gr.Dropdown(label="Language",
|
98 |
+
choices=list(LANGUAGES.keys()),
|
99 |
+
value="en")
|
100 |
+
speaker = gr.Dropdown(label="Speaker",
|
101 |
+
choices=["male", "female"],
|
102 |
+
value="male")
|
103 |
+
submit_btn = gr.Button("Generate Audio")
|
104 |
+
|
105 |
+
with gr.Column():
|
106 |
+
audio_output = gr.Audio(
|
107 |
+
label="Generated Audio",
|
108 |
+
type="numpy", # Explicitly set to numpy for better browser compatibility
|
109 |
+
autoplay=True, # Auto play the audio when generated
|
110 |
+
show_download_button=True, # Show the download button for the audio
|
111 |
+
waveform_options={"waveform_color": "blue", "waveform_progress_color": "red"}, # Customize waveform appearance
|
112 |
+
format="wav" # Specify output format
|
113 |
+
)
|
114 |
+
output_message = gr.Textbox(label="Status")
|
115 |
+
|
116 |
+
submit_btn.click(
|
117 |
+
fn=generate_audio,
|
118 |
+
inputs=[text_input, language, speaker],
|
119 |
+
outputs=[audio_output, output_message]
|
120 |
+
)
|
121 |
+
|
122 |
+
if __name__ == "__main__":
|
123 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio>=3.50.2
|
2 |
+
requests>=2.31.0
|
3 |
+
python-dotenv>=1.0.0
|
4 |
+
numpy>=1.24.0
|