Spaces:
Running
Running
import os | |
import requests | |
import openai | |
import streamlit as st | |
from PIL import Image | |
from dotenv import load_dotenv | |
import torch | |
from transformers import AutoProcessor, AutoModelForImageClassification | |
# Load environment variables from .env file | |
load_dotenv() | |
# Set up OpenAI API key | |
openai.api_key = os.getenv('OPENAI_API_KEY') | |
# Load the processor and model for facial expression recognition | |
processor = AutoProcessor.from_pretrained("trpakov/vit-face-expression") | |
model = AutoModelForImageClassification.from_pretrained("trpakov/vit-face-expression") | |
# Function to query the facial expression recognition model | |
def query_emotion(image): | |
# Preprocess the image | |
inputs = processor(images=image, return_tensors="pt") | |
# Perform inference | |
with torch.no_grad(): | |
outputs = model(**inputs) | |
# Get predicted class index (the class with the highest logit) | |
logits = outputs.logits | |
predicted_class_idx = torch.argmax(logits, dim=-1).item() | |
# Retrieve the label names from the model | |
label_names = model.config.id2label # Mapping of indices to emotion labels | |
predicted_label = label_names[predicted_class_idx] # Get the predicted label | |
return predicted_label | |
# Function to generate a response using OpenAI based on detected emotion | |
def generate_text_based_on_mood(emotion): | |
try: | |
prompt = f"Generate a light-hearted joke or motivational message for someone who is feeling {emotion}." | |
# Call OpenAI's API using GPT-4 | |
response = openai.ChatCompletion.create( | |
model="gpt-4", # Specify the GPT-4 model | |
messages=[ | |
{"role": "user", "content": prompt} | |
] | |
) | |
# Extract the generated text | |
generated_text = response['choices'][0]['message']['content'] | |
return generated_text.strip() | |
except Exception as e: | |
st.error(f"Error generating text: {e}") | |
return "Sorry, I couldn't come up with a message at this moment." | |
# Function to convert text to speech using gTTS | |
def text_to_speech(text): | |
from gtts import gTTS | |
try: | |
tts = gTTS(text, lang='en') | |
audio_file = "output.mp3" | |
tts.save(audio_file) # Save the audio file | |
return audio_file | |
except Exception as e: | |
st.error(f"Error with TTS: {e}") | |
return None | |
# Streamlit UI | |
st.title("Facial Expression Mood Detector") | |
st.write("Upload an image of a face to detect mood and receive uplifting messages or jokes.") | |
# Upload image | |
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) | |
if uploaded_file is not None: | |
# Load and display the image | |
image = Image.open(uploaded_file) | |
st.image(image, caption='Uploaded Image', use_column_width=True) | |
# Detect facial expression | |
emotion = query_emotion(image) | |
st.write(f"Detected emotion: {emotion}") | |
# Generate text based on detected emotion | |
message = generate_text_based_on_mood(emotion) | |
st.write("Here's something to cheer you up:") | |
st.write(message) | |
# Convert the generated message to audio | |
audio_file = text_to_speech(message) | |
# Provide an audio player in the Streamlit app if audio file exists | |
if audio_file: | |
st.audio(audio_file) # Streamlit will handle playback | |