Spaces:

ANASAKHTAR
/

Personal_Assistant

Sleeping

App Files Files Community

ANASAKHTAR commited on Jan 4

Commit

fe0c7a1

verified ·

1 Parent(s): 1bf8b95

Create app.py

Browse files

Files changed (1) hide show

app.py +68 -0

app.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import streamlit as st
+from transformers import pipeline, BlipProcessor, BlipForConditionalGeneration
+from PIL import Image
+import whisper
+from gtts import gTTS
+import tempfile
+# Initialize Models
+# 1. Text Model
+chat_pipeline = pipeline("conversational", model="facebook/blenderbot-400M-distill")
+# 2. Image Model
+image_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+image_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
+# 3. Voice Model
+voice_model = whisper.load_model("base")
+# Streamlit App
+st.title("Multimodal AI Assistant")
+st.write("Interact with AI via text, voice, and images!")
+# Text Input Section
+st.header("Text Interaction")
+user_text = st.text_input("Enter your query:")
+if st.button("Submit Text"):
+    if user_text:
+        response = chat_pipeline(user_text)
+        st.success(f"Assistant: {response[0]['generated_text']}")
+# Voice Input Section
+st.header("Voice Interaction")
+uploaded_audio = st.file_uploader("Upload an audio file:", type=["wav", "mp3"])
+if st.button("Submit Audio"):
+    if uploaded_audio:
+        # Save the uploaded audio to a temporary file
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
+            temp_audio_file.write(uploaded_audio.read())
+            temp_audio_path = temp_audio_file.name
+        # Transcribe audio to text
+        transcribed_text = voice_model.transcribe(temp_audio_path)['text']
+        st.write(f"Transcribed Text: {transcribed_text}")
+        # Generate AI response
+        audio_response = chat_pipeline(transcribed_text)
+        st.success(f"Assistant: {audio_response[0]['generated_text']}")
+        # Convert response to speech
+        tts = gTTS(audio_response[0]['generated_text'])
+        tts_output_path = "response_audio.mp3"
+        tts.save(tts_output_path)
+        st.audio(tts_output_path)
+# Image Input Section
+st.header("Image Interaction")
+uploaded_image = st.file_uploader("Upload an image:", type=["jpg", "png", "jpeg"])
+if st.button("Submit Image"):
+    if uploaded_image:
+        # Display uploaded image
+        image = Image.open(uploaded_image)
+        st.image(image, caption="Uploaded Image")
+        # Generate caption
+        inputs = image_processor(image, return_tensors="pt")
+        outputs = image_model.generate(**inputs)
+        caption = image_processor.decode(outputs[0], skip_special_tokens=True)
+        st.success(f"Generated Caption: {caption}")