bharatcoder commited on
Commit
8aa8920
·
verified ·
1 Parent(s): b79a760

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +123 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import base64
4
+ import os
5
+ from pathlib import Path
6
+ import tempfile
7
+ import numpy as np
8
+ import io
9
+ from dotenv import load_dotenv
10
+
11
+ # Load environment variables from .env file
12
+ load_dotenv()
13
+
14
+ # Maximum allowed input text length
15
+ MAX_TEXT_LENGTH = 4000
16
+
17
+ # Language mapping with display names
18
+ LANGUAGES = {
19
+ "en": "English",
20
+ "hi": "Hindi",
21
+ "bn": "Bengali",
22
+ "ta": "Tamil",
23
+ "te": "Telugu",
24
+ "ml": "Malayalam",
25
+ "mr": "Marathi",
26
+ "gu": "Gujarati",
27
+ "kn": "Kannada"
28
+ }
29
+
30
+ def generate_audio(input_text, input_language, input_speaker):
31
+ """
32
+ Generate audio from text using Krutrim TTS API
33
+ """
34
+ # Check if text exceeds the character limit
35
+ if len(input_text) > MAX_TEXT_LENGTH:
36
+ return None, f"Error: Input text exceeds the maximum limit of {MAX_TEXT_LENGTH} characters. Your text has {len(input_text)} characters."
37
+
38
+ # Get API key from environment variable
39
+ api_key = os.getenv("KRUTRIM_API_KEY")
40
+ if not api_key:
41
+ return None, "Error: KRUTRIM_API_KEY environment variable not found. Please check your .env file."
42
+
43
+ url = "https://cloud.olakrutrim.com/v1/audio/generations/krutrim-tts"
44
+ headers = {
45
+ "Content-Type": "application/json",
46
+ "Authorization": f"Bearer {api_key}"
47
+ }
48
+
49
+ payload = {
50
+ "modelName": "tts",
51
+ "input_text": input_text,
52
+ "input_language": input_language,
53
+ "input_speaker": input_speaker
54
+ }
55
+
56
+ try:
57
+ response = requests.post(url, json=payload, headers=headers)
58
+ response.raise_for_status() # Raise an error for HTTP failure codes
59
+
60
+ result = response.json()
61
+ base64_audio = result["output"]
62
+
63
+ audio_data = base64.b64decode(base64_audio)
64
+
65
+ # Save to a temporary file for download option
66
+ temp_dir = Path(tempfile.gettempdir())
67
+ output_path = temp_dir / "krutrim_output.wav"
68
+
69
+ with open(output_path, "wb") as f:
70
+ f.write(audio_data)
71
+
72
+ # Convert to format suitable for browser playback
73
+ try:
74
+ # Convert WAV data to numpy array for Gradio's audio component
75
+ audio_np = np.frombuffer(audio_data, dtype=np.int16)
76
+ # Standard sample rate for most audio
77
+ sample_rate = 16000 # Adjust this if your API returns a different sample rate
78
+
79
+ return (sample_rate, audio_np), "Audio generated successfully! Click the play button to listen."
80
+ except Exception as e:
81
+ # Fallback to file path if conversion fails
82
+ return str(output_path), f"Audio generated but playback in browser might not work. You can download the file. Error: {e}"
83
+
84
+ except requests.exceptions.RequestException as e:
85
+ return None, f"Error: {e}"
86
+
87
+ # Create Gradio interface
88
+ with gr.Blocks(title="Krutrim Text-to-Speech") as demo:
89
+ gr.Markdown("# Krutrim Text-to-Speech Generator")
90
+ gr.Markdown("Enter your text below and get it converted to speech using Krutrim's TTS API.")
91
+
92
+ with gr.Row():
93
+ with gr.Column():
94
+ text_input = gr.Textbox(label="Text to convert to speech",
95
+ placeholder="Type your text here (maximum 4000 characters)...",
96
+ lines=5)
97
+ language = gr.Dropdown(label="Language",
98
+ choices=list(LANGUAGES.keys()),
99
+ value="en")
100
+ speaker = gr.Dropdown(label="Speaker",
101
+ choices=["male", "female"],
102
+ value="male")
103
+ submit_btn = gr.Button("Generate Audio")
104
+
105
+ with gr.Column():
106
+ audio_output = gr.Audio(
107
+ label="Generated Audio",
108
+ type="numpy", # Explicitly set to numpy for better browser compatibility
109
+ autoplay=True, # Auto play the audio when generated
110
+ show_download_button=True, # Show the download button for the audio
111
+ waveform_options={"waveform_color": "blue", "waveform_progress_color": "red"}, # Customize waveform appearance
112
+ format="wav" # Specify output format
113
+ )
114
+ output_message = gr.Textbox(label="Status")
115
+
116
+ submit_btn.click(
117
+ fn=generate_audio,
118
+ inputs=[text_input, language, speaker],
119
+ outputs=[audio_output, output_message]
120
+ )
121
+
122
+ if __name__ == "__main__":
123
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio>=3.50.2
2
+ requests>=2.31.0
3
+ python-dotenv>=1.0.0
4
+ numpy>=1.24.0