Spaces:

ANASAKHTAR
/

Personal_Assistant

Sleeping

App Files Files Community

Personal_Assistant / app.py

ANASAKHTAR

Update app.py

ab1bf33 verified 6 months ago

raw

history blame

2.53 kB

	import streamlit as st
	from transformers import pipeline, BlipProcessor, BlipForConditionalGeneration
	from PIL import Image
	import whisper
	from gtts import gTTS
	import tempfile

	# Initialize Models
	# 1. Text Model
	chat_pipeline = pipeline("text2text-generation", model="facebook/blenderbot-400M-distill")

	# 2. Image Model
	image_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
	image_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

	# 3. Voice Model
	voice_model = whisper.load_model("base")

	# Streamlit App
	st.title("Multimodal AI Assistant")
	st.write("Interact with AI via text, voice, and images!")

	# Text Input Section
	st.header("Text Interaction")
	user_text = st.text_input("Enter your query:")
	if st.button("Submit Text"):
	if user_text:
	response = chat_pipeline(user_text)
	st.success(f"Assistant: {response[0]['generated_text']}")

	# Voice Input Section
	st.header("Voice Interaction")
	uploaded_audio = st.file_uploader("Upload an audio file:", type=["wav", "mp3"])
	if st.button("Submit Audio"):
	if uploaded_audio:
	# Save the uploaded audio to a temporary file
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
	temp_audio_file.write(uploaded_audio.read())
	temp_audio_path = temp_audio_file.name

	# Transcribe audio to text
	transcribed_text = voice_model.transcribe(temp_audio_path)['text']
	st.write(f"Transcribed Text: {transcribed_text}")

	# Generate AI response
	audio_response = chat_pipeline(transcribed_text)
	st.success(f"Assistant: {audio_response[0]['generated_text']}")

	# Convert response to speech
	tts = gTTS(audio_response[0]['generated_text'])
	tts_output_path = "response_audio.mp3"
	tts.save(tts_output_path)
	st.audio(tts_output_path)

	# Image Input Section
	st.header("Image Interaction")
	uploaded_image = st.file_uploader("Upload an image:", type=["jpg", "png", "jpeg"])
	if st.button("Submit Image"):
	if uploaded_image:
	# Display uploaded image
	image = Image.open(uploaded_image)
	st.image(image, caption="Uploaded Image")

	# Generate caption
	inputs = image_processor(image, return_tensors="pt")
	outputs = image_model.generate(**inputs)
	caption = image_processor.decode(outputs[0], skip_special_tokens=True)
	st.success(f"Generated Caption: {caption}")