Spaces:

ANASAKHTAR
/

Personal_Assistant

Sleeping

App Files Files Community

Personal_Assistant / app.py

ANASAKHTAR

Update app.py

ab1bf33 verified 6 months ago

raw

history blame contribute delete

2.53 kB

	import streamlit as st
	from transformers import pipeline, BlipProcessor, BlipForConditionalGeneration
	from PIL import Image
	import whisper
	from gtts import gTTS
	import tempfile

	# Initialize Models
	# 1. Text Model
	chat_pipeline = pipeline("text2text-generation", model="facebook/blenderbot-400M-distill")

	# 2. Image Model
	image_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
	image_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

	# 3. Voice Model
	voice_model = whisper.load_model("base")

	# Streamlit App
	st.title("Multimodal AI Assistant")
	st.write("Interact with AI via text, voice, and images!")

	# Text Input Section
	st.header("Text Interaction")
	user_text = st.text_input("Enter your query:")
	if st.button("Submit Text"):
	if user_text:
	response = chat_pipeline(user_text)
	st.success(f"Assistant: {response[0]['generated_text']}")

	# Voice Input Section
	st.header("Voice Interaction")
	uploaded_audio = st.file_uploader("Upload an audio file:", type=["wav", "mp3"])
	if st.button("Submit Audio"):
	if uploaded_audio:
	# Save the uploaded audio to a temporary file
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
	temp_audio_file.write(uploaded_audio.read())
	temp_audio_path = temp_audio_file.name

	# Transcribe audio to text
	transcribed_text = voice_model.transcribe(temp_audio_path)['text']
	st.write(f"Transcribed Text: {transcribed_text}")

	# Generate AI response
	audio_response = chat_pipeline(transcribed_text)
	st.success(f"Assistant: {audio_response[0]['generated_text']}")

	# Convert response to speech
	tts = gTTS(audio_response[0]['generated_text'])
	tts_output_path = "response_audio.mp3"
	tts.save(tts_output_path)
	st.audio(tts_output_path)

	# Image Input Section
	st.header("Image Interaction")
	uploaded_image = st.file_uploader("Upload an image:", type=["jpg", "png", "jpeg"])
	if st.button("Submit Image"):
	if uploaded_image:
	# Display uploaded image
	image = Image.open(uploaded_image)
	st.image(image, caption="Uploaded Image")

	# Generate caption
	inputs = image_processor(image, return_tensors="pt")
	outputs = image_model.generate(**inputs)
	caption = image_processor.decode(outputs[0], skip_special_tokens=True)
	st.success(f"Generated Caption: {caption}")