shukdevdatta123 commited on
Commit
c4bd9c2
·
verified ·
1 Parent(s): 107b272

Delete v1.txt

Browse files
Files changed (1) hide show
  1. v1.txt +0 -253
v1.txt DELETED
@@ -1,253 +0,0 @@
1
- import base64
2
- import tempfile
3
- import os
4
- import requests
5
- import gradio as gr
6
- from openai import OpenAI
7
-
8
- # Available voices for audio generation
9
- VOICES = ["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"]
10
-
11
- def process_text_input(api_key, text_prompt, selected_voice):
12
- """Generate audio response from text input"""
13
- try:
14
- # Initialize OpenAI client with the provided API key
15
- client = OpenAI(api_key=api_key)
16
-
17
- completion = client.chat.completions.create(
18
- model="gpt-4o-audio-preview",
19
- modalities=["text", "audio"],
20
- audio={"voice": selected_voice, "format": "wav"},
21
- messages=[
22
- {
23
- "role": "user",
24
- "content": text_prompt
25
- }
26
- ]
27
- )
28
-
29
- # Save the audio to a temporary file
30
- wav_bytes = base64.b64decode(completion.choices[0].message.audio.data)
31
- temp_path = tempfile.mktemp(suffix=".wav")
32
- with open(temp_path, "wb") as f:
33
- f.write(wav_bytes)
34
-
35
- # Get the text response
36
- text_response = completion.choices[0].message.content
37
-
38
- return text_response, temp_path
39
- except Exception as e:
40
- return f"Error: {str(e)}", None
41
-
42
- def process_audio_input(api_key, audio_path, text_prompt, selected_voice):
43
- """Process audio input and generate a response"""
44
- try:
45
- if not audio_path:
46
- return "Please upload or record audio first.", None
47
-
48
- # Initialize OpenAI client with the provided API key
49
- client = OpenAI(api_key=api_key)
50
-
51
- # Read audio file and encode to base64
52
- with open(audio_path, "rb") as audio_file:
53
- audio_data = audio_file.read()
54
- encoded_audio = base64.b64encode(audio_data).decode('utf-8')
55
-
56
- # Create message content with both text and audio
57
- message_content = []
58
-
59
- if text_prompt:
60
- message_content.append({
61
- "type": "text",
62
- "text": text_prompt
63
- })
64
-
65
- message_content.append({
66
- "type": "input_audio",
67
- "input_audio": {
68
- "data": encoded_audio,
69
- "format": "wav"
70
- }
71
- })
72
-
73
- # Call OpenAI API
74
- completion = client.chat.completions.create(
75
- model="gpt-4o-audio-preview",
76
- modalities=["text", "audio"],
77
- audio={"voice": selected_voice, "format": "wav"},
78
- messages=[
79
- {
80
- "role": "user",
81
- "content": message_content
82
- }
83
- ]
84
- )
85
-
86
- # Save the audio response
87
- wav_bytes = base64.b64decode(completion.choices[0].message.audio.data)
88
- temp_path = tempfile.mktemp(suffix=".wav")
89
- with open(temp_path, "wb") as f:
90
- f.write(wav_bytes)
91
-
92
- # Get the text response
93
- text_response = completion.choices[0].message.content
94
-
95
- return text_response, temp_path
96
- except Exception as e:
97
- return f"Error: {str(e)}", None
98
-
99
- def download_example_audio():
100
- """Download an example audio file for testing"""
101
- try:
102
- url = "https://cdn.openai.com/API/docs/audio/alloy.wav"
103
- response = requests.get(url)
104
- response.raise_for_status()
105
-
106
- # Save to a temporary file
107
- temp_path = tempfile.mktemp(suffix=".wav")
108
- with open(temp_path, "wb") as f:
109
- f.write(response.content)
110
-
111
- return temp_path
112
- except Exception as e:
113
- return None
114
-
115
- def use_example_audio():
116
- """Load example audio for the interface"""
117
- audio_path = download_example_audio()
118
- return audio_path
119
-
120
- # Create Gradio Interface
121
- with gr.Blocks(title="OpenAI Audio Chat App") as app:
122
- gr.Markdown("# OpenAI Audio Chat App")
123
- gr.Markdown("Interact with GPT-4o audio model through text and audio inputs")
124
-
125
- # API Key input (used across all tabs)
126
- api_key = gr.Textbox(
127
- label="OpenAI API Key",
128
- placeholder="Enter your OpenAI API key here",
129
- type="password"
130
- )
131
-
132
- with gr.Tab("Text to Audio"):
133
- with gr.Row():
134
- with gr.Column():
135
- text_input = gr.Textbox(
136
- label="Text Prompt",
137
- placeholder="Enter your question or prompt here...",
138
- lines=3
139
- )
140
- text_voice = gr.Dropdown(
141
- choices=VOICES,
142
- value="alloy",
143
- label="Voice"
144
- )
145
- text_submit = gr.Button("Generate Response")
146
-
147
- with gr.Column():
148
- text_output = gr.Textbox(label="AI Response (Text)", lines=5)
149
- audio_output = gr.Audio(label="AI Response (Audio)")
150
-
151
- text_submit.click(
152
- fn=process_text_input,
153
- inputs=[api_key, text_input, text_voice],
154
- outputs=[text_output, audio_output]
155
- )
156
-
157
- with gr.Tab("Audio Input to Audio Response"):
158
- with gr.Row():
159
- with gr.Column():
160
- audio_input = gr.Audio(
161
- label="Audio Input",
162
- type="filepath",
163
- sources=["microphone", "upload"]
164
- )
165
- example_btn = gr.Button("Use Example Audio")
166
-
167
- accompanying_text = gr.Textbox(
168
- label="Accompanying Text (Optional)",
169
- placeholder="Add any text context or question about the audio...",
170
- lines=2
171
- )
172
- audio_voice = gr.Dropdown(
173
- choices=VOICES,
174
- value="alloy",
175
- label="Response Voice"
176
- )
177
- audio_submit = gr.Button("Process Audio & Generate Response")
178
-
179
- with gr.Column():
180
- audio_text_output = gr.Textbox(label="AI Response (Text)", lines=5)
181
- audio_audio_output = gr.Audio(label="AI Response (Audio)")
182
-
183
- audio_submit.click(
184
- fn=process_audio_input,
185
- inputs=[api_key, audio_input, accompanying_text, audio_voice],
186
- outputs=[audio_text_output, audio_audio_output]
187
- )
188
-
189
- example_btn.click(
190
- fn=use_example_audio,
191
- inputs=[],
192
- outputs=[audio_input]
193
- )
194
-
195
- with gr.Tab("Voice Samples"):
196
- gr.Markdown("## Listen to samples of each voice")
197
-
198
- def generate_voice_sample(api_key, voice_type):
199
- try:
200
- if not api_key:
201
- return "Please enter your OpenAI API key first.", None
202
-
203
- client = OpenAI(api_key=api_key)
204
- completion = client.chat.completions.create(
205
- model="gpt-4o-audio-preview",
206
- modalities=["text", "audio"],
207
- audio={"voice": voice_type, "format": "wav"},
208
- messages=[
209
- {
210
- "role": "user",
211
- "content": f"This is a sample of the {voice_type} voice. It has its own unique tone and character."
212
- }
213
- ]
214
- )
215
-
216
- # Save the audio to a temporary file
217
- wav_bytes = base64.b64decode(completion.choices[0].message.audio.data)
218
- temp_path = tempfile.mktemp(suffix=".wav")
219
- with open(temp_path, "wb") as f:
220
- f.write(wav_bytes)
221
-
222
- return f"Sample generated with voice: {voice_type}", temp_path
223
- except Exception as e:
224
- return f"Error: {str(e)}", None
225
-
226
- with gr.Row():
227
- sample_voice = gr.Dropdown(
228
- choices=VOICES,
229
- value="alloy",
230
- label="Select Voice Sample"
231
- )
232
- sample_btn = gr.Button("Generate Sample")
233
-
234
- with gr.Row():
235
- sample_text = gr.Textbox(label="Status")
236
- sample_audio = gr.Audio(label="Voice Sample")
237
-
238
- sample_btn.click(
239
- fn=generate_voice_sample,
240
- inputs=[api_key, sample_voice],
241
- outputs=[sample_text, sample_audio]
242
- )
243
-
244
- gr.Markdown("""
245
- ## Notes:
246
- - You must provide your OpenAI API key in the field above
247
- - The model used is `gpt-4o-audio-preview`
248
- - Audio inputs should be in WAV format
249
- - Available voices: alloy, ash, ballad, coral, echo, fable, onyx, nova, sage, shimmer, and verse
250
- """)
251
-
252
- if __name__ == "__main__":
253
- app.launch()