"""
# Custom HTML for suggestions
custom_suggestions = """
💬
Chat with the Text Model
🖼️
Analyze Images with Vision Model
🔊
Generate Speech with Parler-TTS
🔍
Explore advanced options
"""
# Gradio interface
with gr.Blocks(css=custom_css, theme=gr.themes.Base().set(
body_background_fill="#0b0f19",
body_text_color="#e2e8f0",
button_primary_background_fill="#3b82f6",
button_primary_background_fill_hover="#2563eb",
button_primary_text_color="white",
block_title_text_color="#94a3b8",
block_label_text_color="#94a3b8",
)) as demo:
gr.HTML(custom_header)
gr.HTML(custom_suggestions)
with gr.Tab("Text Model (Phi-3.5-mini)"):
chatbot = gr.Chatbot(height=400)
msg = gr.Textbox(label="Message", placeholder="Type your message here...")
with gr.Accordion("Advanced Options", open=False):
system_prompt = gr.Textbox(value="You are a helpful assistant", label="System Prompt")
temperature = gr.Slider(minimum=0, maximum=1, step=0.1, value=0.8, label="Temperature")
max_new_tokens = gr.Slider(minimum=128, maximum=8192, step=1, value=1024, label="Max new tokens")
top_p = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=1.0, label="top_p")
top_k = gr.Slider(minimum=1, maximum=20, step=1, value=20, label="top_k")
submit_btn = gr.Button("Submit", variant="primary")
clear_btn = gr.Button("Clear Chat", variant="secondary")
audio_output = gr.Audio(label="AI Response Audio")
submit_btn.click(stream_text_chat,
inputs=[msg, chatbot, system_prompt, temperature, max_new_tokens, top_p, top_k],
outputs=[chatbot, audio_output])
clear_btn.click(lambda: (None, None), None, [chatbot, audio_output], queue=False)
with gr.Tab("Vision Model (Phi-3.5-vision)"):
with gr.Row():
with gr.Column(scale=1):
vision_input_img = gr.Image(label="Upload an Image", type="pil")
vision_text_input = gr.Textbox(label="Ask a question about the image", placeholder="What do you see in this image?")
vision_submit_btn = gr.Button("Analyze Image", variant="primary")
with gr.Column(scale=1):
vision_output_text = gr.Textbox(label="AI Analysis", lines=10)
vision_submit_btn.click(process_vision_query, [vision_input_img, vision_text_input], [vision_output_text])
with gr.Tab("Text-to-Speech (Parler-TTS)"):
with gr.Row():
with gr.Column(scale=1):
tts_prompt = gr.Textbox(label="Text to Speak", placeholder="Enter the text you want to convert to speech...")
tts_description = gr.Textbox(label="Voice Description", value="A female speaker delivers a slightly expressive and animated speech with a moderate speed and pitch. The recording is of very high quality, with the speaker's voice sounding clear and very close up.", lines=3)
tts_submit_btn = gr.Button("Generate Speech", variant="primary")
with gr.Column(scale=1):
tts_output_audio = gr.Audio(label="Generated Speech")
tts_submit_btn.click(generate_speech, inputs=[tts_prompt, tts_description], outputs=[tts_output_audio])
gr.HTML("")
if __name__ == "__main__":
demo.launch()