File size: 7,113 Bytes
8b97f99 ec333f1 ec3a27a 8b97f99 c2740a5 8b97f99 c2740a5 8b97f99 c2740a5 8b97f99 c2740a5 8b97f99 ec3a27a 63271b3 8b97f99 ec3a27a 8b97f99 c2740a5 8b97f99 c2740a5 8b97f99 ec3a27a f386ba9 ec3a27a ec333f1 ec3a27a ec333f1 f386ba9 ec333f1 ec3a27a 8b97f99 ec3a27a 8b97f99 ec3a27a a0d1236 ec3a27a 7057cb9 ec3a27a 7057cb9 2be48c9 ec3a27a 2be48c9 92fc24f ec3a27a 92fc24f ec3a27a 92fc24f 7057cb9 ec3a27a 8b97f99 7057cb9 8b97f99 ec3a27a 8b97f99 ec3a27a 8b97f99 c2740a5 8b97f99 ec3a27a 8b97f99 c2740a5 8b97f99 ec3a27a 8b97f99 c2740a5 8b97f99 ec3a27a ec333f1 8b97f99 ec3a27a 8b97f99 ec3a27a f386ba9 ec3a27a 8b97f99 a723167 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
import gradio as gr
import openai
import fitz # PyMuPDF for PDF processing
import base64
import io
import numpy as np
import soundfile as sf
# Variable to store API key
api_key = ""
# Function to update API key
def set_api_key(key):
global api_key
api_key = key
return "API Key Set Successfully!"
# Function to interact with OpenAI API
def query_openai(messages, temperature, top_p, max_output_tokens):
if not api_key:
return "Please enter your OpenAI API key first."
try:
openai.api_key = api_key
response = openai.ChatCompletion.create(
model="gpt-4.5-preview",
messages=messages,
temperature=float(temperature),
top_p=float(top_p),
max_tokens=int(max_output_tokens)
)
return response["choices"][0]["message"]["content"]
except Exception as e:
return f"Error: {str(e)}"
# Function to transcribe audio
def transcribe_audio(audio_input):
if not api_key:
return "Error: No API key provided."
openai.api_key = api_key
try:
if isinstance(audio_input, np.ndarray):
wav_io = io.BytesIO()
sf.write(wav_io, audio_input, samplerate=16000, format="WAV")
wav_io.seek(0)
audio_file_obj = wav_io
audio_file_obj.name = "recorded_audio.wav"
else:
audio_file_obj = io.BytesIO(audio_input)
audio_file_obj.name = "uploaded_audio.wav"
transcription = openai.Audio.transcribe(file=audio_file_obj, model="whisper-1")
return transcription["text"]
except Exception as e:
return f"Error transcribing audio: {str(e)}"
# Function to clear chat
def clear_chat():
return "", "", "", "", "", "", "", None, "", None, "", None, "", 1.0, 1.0, 2048
# Gradio UI Layout
with gr.Blocks() as demo:
gr.Markdown("## π₯ GPT-4.5 AI Chatbot: Text, Image, PDF, & Voice Support")
# Custom CSS for buttons
gr.HTML("""
<style>
#api_key_button {
margin-top: 27px;
background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%);
color: white;
font-weight: bold;
border-radius: 5px;
}
#api_key_button:hover {
background: linear-gradient(135deg, #5b10f1 0%, #9f3ef3 100%);
}
#clear_chat_button {
background: linear-gradient(135deg, #e53e3e 0%, #f56565 100%);
color: white;
font-weight: bold;
border-radius: 5px;
}
#clear_chat_button:hover {
background: linear-gradient(135deg, #c53030 0%, #e53e3e 100%);
}
</style>
""")
# API Key Input
with gr.Row():
api_key_input = gr.Textbox(label="Enter OpenAI API Key", type="password")
api_key_button = gr.Button("Set API Key", elem_id="api_key_button")
api_key_output = gr.Textbox(label="API Key Status", interactive=False)
# Accordion for Hyperparameters
with gr.Accordion("π§ Advanced Settings (Hyperparameters)", open=False):
gr.Markdown("""
- **Temperature**: Controls randomness. Lower values make responses more predictable.
- **Top-P (Nucleus Sampling)**: Determines how many top probable words can be chosen.
- **Max Output Tokens**: Limits the length of the response.
""")
with gr.Row():
temperature = gr.Slider(0, 2, value=1.0, step=0.1, label="Temperature")
top_p = gr.Slider(0, 1, value=1.0, step=0.1, label="Top-P")
max_output_tokens = gr.Slider(0, 16384, value=2048, step=512, label="Max Output Tokens")
with gr.Tabs():
with gr.Tab("π¬ Text Chat"):
text_query = gr.Textbox(label="Enter your query")
text_output = gr.Textbox(label="Response", interactive=False)
text_button = gr.Button("Ask")
with gr.Tab("πΌοΈ Image URL Chat"):
image_url = gr.Textbox(label="Enter Image URL")
image_query = gr.Textbox(label="Ask about the Image")
image_url_output = gr.Textbox(label="Response", interactive=False)
image_url_button = gr.Button("Ask")
with gr.Tab("πΈ Image Upload Chat"):
image_upload = gr.File(label="Upload an Image", type="filepath")
image_text_query = gr.Textbox(label="Ask about the uploaded image")
image_output = gr.Textbox(label="Response", interactive=False)
image_button = gr.Button("Ask")
with gr.Tab("π PDF Chat"):
pdf_upload = gr.File(label="Upload a PDF", type="filepath")
pdf_text_query = gr.Textbox(label="Ask about the uploaded PDF")
pdf_output = gr.Textbox(label="Response", interactive=False)
pdf_button = gr.Button("Ask")
with gr.Tab("π€ Voice Chat"):
audio_record = gr.Audio(source="microphone", type="numpy", label="ποΈ Record Audio")
audio_upload = gr.File(label="π Upload an Audio File", type="binary")
audio_query = gr.Textbox(label="Ask a question about the transcription")
audio_output = gr.Textbox(label="Response", interactive=False)
audio_button = gr.Button("Ask")
# Clear chat button
clear_button = gr.Button("π§Ή Clear Chat", elem_id="clear_chat_button")
# Button Click Actions
api_key_button.click(set_api_key, inputs=[api_key_input], outputs=[api_key_output])
text_button.click(lambda q, t, p, m: query_openai([{"role": "user", "content": [{"type": "text", "text": q}]}], t, p, m),
inputs=[text_query, temperature, top_p, max_output_tokens],
outputs=[text_output])
image_url_button.click(lambda u, q, t, p, m: query_openai([{"role": "user", "content": [{"type": "image_url", "image_url": {"url": u}}, {"type": "text", "text": q}]}], t, p, m),
inputs=[image_url, image_query, temperature, top_p, max_output_tokens],
outputs=[image_url_output])
image_button.click(lambda f, q, t, p, m: query_openai([{"role": "user", "content": [{"type": "image_url", "image_url": {"url": f}}, {"type": "text", "text": q}]}], t, p, m),
inputs=[image_upload, image_text_query, temperature, top_p, max_output_tokens],
outputs=[image_output])
pdf_button.click(lambda f, q, t, p, m: query_openai([{"role": "user", "content": [{"type": "text", "text": f.read()}, {"type": "text", "text": q}]}], t, p, m),
inputs=[pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens],
outputs=[pdf_output])
audio_button.click(lambda a, q, t, p, m: query_openai([{"role": "user", "content": [{"type": "text", "text": transcribe_audio(a)}, {"type": "text", "text": q}]}], t, p, m),
inputs=[audio_record, audio_query, temperature, top_p, max_output_tokens],
outputs=[audio_output])
# Launch Gradio App
if __name__ == "__main__":
demo.launch() |