Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import pipeline, BlipProcessor, BlipForConditionalGeneration | |
from PIL import Image | |
import whisper | |
from gtts import gTTS | |
import tempfile | |
# Initialize Models | |
# 1. Text Model | |
chat_pipeline = pipeline("text2text-generation", model="facebook/blenderbot-400M-distill") | |
# 2. Image Model | |
image_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") | |
image_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") | |
# 3. Voice Model | |
voice_model = whisper.load_model("base") | |
# Streamlit App | |
st.title("Multimodal AI Assistant") | |
st.write("Interact with AI via text, voice, and images!") | |
# Text Input Section | |
st.header("Text Interaction") | |
user_text = st.text_input("Enter your query:") | |
if st.button("Submit Text"): | |
if user_text: | |
response = chat_pipeline(user_text) | |
st.success(f"Assistant: {response[0]['generated_text']}") | |
# Voice Input Section | |
st.header("Voice Interaction") | |
uploaded_audio = st.file_uploader("Upload an audio file:", type=["wav", "mp3"]) | |
if st.button("Submit Audio"): | |
if uploaded_audio: | |
# Save the uploaded audio to a temporary file | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file: | |
temp_audio_file.write(uploaded_audio.read()) | |
temp_audio_path = temp_audio_file.name | |
# Transcribe audio to text | |
transcribed_text = voice_model.transcribe(temp_audio_path)['text'] | |
st.write(f"Transcribed Text: {transcribed_text}") | |
# Generate AI response | |
audio_response = chat_pipeline(transcribed_text) | |
st.success(f"Assistant: {audio_response[0]['generated_text']}") | |
# Convert response to speech | |
tts = gTTS(audio_response[0]['generated_text']) | |
tts_output_path = "response_audio.mp3" | |
tts.save(tts_output_path) | |
st.audio(tts_output_path) | |
# Image Input Section | |
st.header("Image Interaction") | |
uploaded_image = st.file_uploader("Upload an image:", type=["jpg", "png", "jpeg"]) | |
if st.button("Submit Image"): | |
if uploaded_image: | |
# Display uploaded image | |
image = Image.open(uploaded_image) | |
st.image(image, caption="Uploaded Image") | |
# Generate caption | |
inputs = image_processor(image, return_tensors="pt") | |
outputs = image_model.generate(**inputs) | |
caption = image_processor.decode(outputs[0], skip_special_tokens=True) | |
st.success(f"Generated Caption: {caption}") | |