shukdevdatta123 commited on
Commit
ec333f1
·
verified ·
1 Parent(s): 7057cb9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -3
app.py CHANGED
@@ -2,6 +2,9 @@ import gradio as gr
2
  import openai
3
  import fitz # PyMuPDF for PDF processing
4
  import base64
 
 
 
5
 
6
  # Variable to store API key
7
  api_key = ""
@@ -45,7 +48,7 @@ def image_url_chat(image_url, text_query, temperature, top_p, max_output_tokens)
45
  {"role": "user", "content": [
46
  {"type": "image_url", "image_url": {"url": image_url}}, # Corrected format
47
  {"type": "text", "text": text_query}
48
- ]}
49
  ]
50
  return query_openai(messages, temperature, top_p, max_output_tokens)
51
 
@@ -72,7 +75,7 @@ def image_chat(image_file, text_query, temperature, top_p, max_output_tokens):
72
  {"role": "user", "content": [
73
  {"type": "image_url", "image_url": {"url": image_data}}, # Fixed format
74
  {"type": "text", "text": text_query}
75
- ]}
76
  ]
77
  return query_openai(messages, temperature, top_p, max_output_tokens)
78
 
@@ -89,10 +92,33 @@ def pdf_chat(pdf_file, text_query, temperature, top_p, max_output_tokens):
89
  {"role": "user", "content": [
90
  {"type": "text", "text": text}, # Fixed format
91
  {"type": "text", "text": text_query}
92
- ]}
93
  ]
94
  return query_openai(messages, temperature, top_p, max_output_tokens)
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  # Function to clear the chat (Fix: Returns the correct number of outputs)
97
  def clear_chat():
98
  return "", "", "", "", "", "", "", None, "", None, "", 1.0, 1.0, 2048
@@ -144,6 +170,12 @@ with gr.Blocks() as demo:
144
  pdf_output = gr.Textbox(label="Response", interactive=False)
145
  pdf_button = gr.Button("Ask")
146
 
 
 
 
 
 
 
147
  # Clear chat button
148
  clear_button = gr.Button("Clear Chat")
149
 
@@ -153,6 +185,7 @@ with gr.Blocks() as demo:
153
  text_button.click(text_chat, [text_query, temperature, top_p, max_output_tokens], text_output)
154
  image_button.click(image_chat, [image_upload, image_text_query, temperature, top_p, max_output_tokens], image_output)
155
  pdf_button.click(pdf_chat, [pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens], pdf_output)
 
156
 
157
  # Fix: Clear button resets all necessary fields correctly
158
  clear_button.click(
 
2
  import openai
3
  import fitz # PyMuPDF for PDF processing
4
  import base64
5
+ import openai
6
+ import soundfile as sf
7
+ import io
8
 
9
  # Variable to store API key
10
  api_key = ""
 
48
  {"role": "user", "content": [
49
  {"type": "image_url", "image_url": {"url": image_url}}, # Corrected format
50
  {"type": "text", "text": text_query}
51
+ ]},
52
  ]
53
  return query_openai(messages, temperature, top_p, max_output_tokens)
54
 
 
75
  {"role": "user", "content": [
76
  {"type": "image_url", "image_url": {"url": image_data}}, # Fixed format
77
  {"type": "text", "text": text_query}
78
+ ]},
79
  ]
80
  return query_openai(messages, temperature, top_p, max_output_tokens)
81
 
 
92
  {"role": "user", "content": [
93
  {"type": "text", "text": text}, # Fixed format
94
  {"type": "text", "text": text_query}
95
+ ]},
96
  ]
97
  return query_openai(messages, temperature, top_p, max_output_tokens)
98
 
99
+ # Function to process uploaded audio and transcribe to text
100
+ def transcribe_audio(audio_file):
101
+ if audio_file is None:
102
+ return "Please upload an audio file."
103
+
104
+ try:
105
+ # Load the audio file
106
+ audio_data, samplerate = sf.read(audio_file.name)
107
+ audio_buffer = io.BytesIO()
108
+ sf.write(audio_buffer, audio_data, samplerate, format='WAV')
109
+ audio_buffer.seek(0)
110
+
111
+ # Transcribe the audio using OpenAI's Whisper API
112
+ transcript = openai.Audio.transcribe(
113
+ model="whisper-1",
114
+ file=audio_buffer
115
+ )
116
+
117
+ return transcript["text"]
118
+
119
+ except Exception as e:
120
+ return f"Error in transcription: {str(e)}"
121
+
122
  # Function to clear the chat (Fix: Returns the correct number of outputs)
123
  def clear_chat():
124
  return "", "", "", "", "", "", "", None, "", None, "", 1.0, 1.0, 2048
 
170
  pdf_output = gr.Textbox(label="Response", interactive=False)
171
  pdf_button = gr.Button("Ask")
172
 
173
+ with gr.Tab("Voice Chat"):
174
+ audio_upload = gr.File(label="Upload an Audio File", type="file")
175
+ audio_query = gr.Textbox(label="Ask about the transcription")
176
+ audio_output = gr.Textbox(label="Response", interactive=False)
177
+ audio_button = gr.Button("Ask")
178
+
179
  # Clear chat button
180
  clear_button = gr.Button("Clear Chat")
181
 
 
185
  text_button.click(text_chat, [text_query, temperature, top_p, max_output_tokens], text_output)
186
  image_button.click(image_chat, [image_upload, image_text_query, temperature, top_p, max_output_tokens], image_output)
187
  pdf_button.click(pdf_chat, [pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens], pdf_output)
188
+ audio_button.click(lambda audio, query, temperature, top_p, max_output_tokens: query_openai([{"role": "user", "content": [{"type": "text", "text": transcribe_audio(audio)}, {"type": "text", "text": query}]}], temperature, top_p, max_output_tokens), [audio_upload, audio_query, temperature, top_p, max_output_tokens], audio_output)
189
 
190
  # Fix: Clear button resets all necessary fields correctly
191
  clear_button.click(