Spaces:

Lucasstranger1
/

CTP_HW9

Running

File size: 3,400 Bytes

e1d89e6
 
c884652
e436a2e
e1d89e6
 
3e57df9
 
e1d89e6
4a17310
 
e1d89e6
3e57df9
acaac68
4a17310
d9c2f6c
c884652
 
 
acaac68
3e57df9
 
 
 
 
 
 
 
 
 
 
e1d89e6
3e57df9
 
 
 
 
 
 
e1d89e6
a1b1b7f
e1d89e6
4a17310
a1b1b7f
3e57df9
edbb580
3e57df9
c884652
edbb580
c884652
 
 
 
 
 
edbb580
c884652
 
4a17310
 
a1b1b7f
e1d89e6
e436a2e
 
 
dc26248
 
 
 
 
 
ceb1369
 
e436a2e
e1d89e6
 
edbb580
e1d89e6
 
 
 
 
 
 
 
 
 
3e57df9

import os
import requests
import openai
import streamlit as st
from PIL import Image
from dotenv import load_dotenv
import torch
from transformers import AutoProcessor, AutoModelForImageClassification

# Load environment variables from .env file
load_dotenv()

# Set up the Hugging Face API for emotion detection
emotion_model_url = "https://api-inference.huggingface.co/models/trpakov/vit-face-expression"
headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACE_API_KEY')}"}

# Set up OpenAI API key
openai.api_key = os.getenv('OPENAI_API_KEY')

# Function to query the facial expression recognition model
def query_emotion(image):
    # Load the processor and model
    processor = AutoProcessor.from_pretrained("trpakov/vit-face-expression")
    model = AutoModelForImageClassification.from_pretrained("trpakov/vit-face-expression")

    # Preprocess the image
    inputs = processor(images=image, return_tensors="pt")

    # Perform inference
    with torch.no_grad():
        outputs = model(**inputs)
    
    # Get predicted class index
    logits = outputs.logits
    predicted_class_idx = torch.argmax(logits, dim=-1).item()

    # Get the predicted label
    predicted_label = processor.decode(predicted_class_idx)
    return predicted_label

# Function to generate a response using OpenAI based on detected emotion
def generate_text_based_on_mood(emotion):
    try:
        # Create a dynamic prompt based on the detected emotion
        prompt = f"Generate a light-hearted joke or motivational message for someone who is feeling {emotion}."
        
        # Call OpenAI's API using GPT-4
        response = openai.ChatCompletion.create(
            model="gpt-4",  # Specify the GPT-4 model
            messages=[
                {"role": "user", "content": prompt}
            ]
        )

        # Extract the generated text
        generated_text = response['choices'][0]['message']['content']
        return generated_text.strip()

    except Exception as e:
        st.error(f"Error generating text: {e}")
        return "Sorry, I couldn't come up with a message at this moment."

# Function to convert text to speech using gTTS
def text_to_speech(text):
    from gtts import gTTS
    try:
        tts = gTTS(text, lang='en')
        audio_file = "output.mp3"
        tts.save(audio_file)  # Save the audio file
        return audio_file
    except Exception as e:
        st.error(f"Error with TTS: {e}")
        return None

# Streamlit UI
st.title("Facial Expression Mood Detector")
st.write("Upload an image of a face to detect mood and receive uplifting messages or jokes.")

# Upload image
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])

if uploaded_file is not None:
    # Load and display the image
    image = Image.open(uploaded_file)
    st.image(image, caption='Uploaded Image', use_column_width=True)

    # Detect facial expression
    emotion = query_emotion(image)
    st.write(f"Detected emotion: {emotion}")

    # Generate text based on detected emotion
    message = generate_text_based_on_mood(emotion)
    st.write("Here's something to cheer you up:")
    st.write(message)

    # Convert the generated message to audio
    audio_file = text_to_speech(message)

    # Provide an audio player in the Streamlit app if audio file exists
    if audio_file:
        st.audio(audio_file)  # Streamlit will handle playback