Spaces:

eusholli
/

computer-vision-playground

Running

App Files Files Community

eusholli commited on Jun 29, 2024

Commit

9dc0fd2

1 Parent(s): 56a64f9

added photo upload

Browse files

Files changed (1) hide show

app.py +40 -40

app.py CHANGED Viewed

@@ -16,12 +16,10 @@ from mtcnn import MTCNN
 from PIL import Image, ImageDraw
 from transformers import pipeline
 # Initialize the Hugging Face pipeline for facial emotion detection
 emotion_pipeline = pipeline("image-classification", model="trpakov/vit-face-expression")
-img_container = {"webcam": None,
-                 "analyzed": None}
 # Initialize MTCNN for face detection
 mtcnn = MTCNN()
@@ -37,21 +35,13 @@ class Detection(NamedTuple):
     score: float
     box: np.ndarray
-# NOTE: The callback will be called in another thread,
-#       so use a queue here for thread-safety to pass the data
-#       from inside to outside the callback.
-# TODO: A general-purpose shared state object may be more useful.
 result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
 # Function to analyze sentiment
 def analyze_sentiment(face):
-    # Convert face to RGB
     rgb_face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
-    # Convert the face to a PIL image
     pil_image = Image.fromarray(rgb_face)
-    # Analyze sentiment using the Hugging Face pipeline
     results = emotion_pipeline(pil_image)
-    # Get the dominant emotion
     dominant_emotion = max(results, key=lambda x: x['score'])['label']
     return dominant_emotion
@@ -60,28 +50,19 @@ LINE_SIZE = 2
 # Function to detect faces, analyze sentiment, and draw a red box around them
 def detect_and_draw_faces(frame):
-    # Detect faces using MTCNN
     results = mtcnn.detect_faces(frame)
-    # Draw on the frame
     for result in results:
         x, y, w, h = result['box']
         face = frame[y:y+h, x:x+w]
         sentiment = analyze_sentiment(face)
-        cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 0, 255), LINE_SIZE)  # Thicker red box
-        # Calculate position for the text background and the text itself
         text_size = cv2.getTextSize(sentiment, cv2.FONT_HERSHEY_SIMPLEX, TEXT_SIZE, 2)[0]
         text_x = x
         text_y = y - 10
         background_tl = (text_x, text_y - text_size[1])
         background_br = (text_x + text_size[0], text_y + 5)
-        # Draw black rectangle as background
         cv2.rectangle(frame, background_tl, background_br, (0, 0, 0), cv2.FILLED)
-        # Draw white text on top
         cv2.putText(frame, sentiment, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, TEXT_SIZE, (255, 255, 255), 2)
     result_queue.put(results)
     return frame
@@ -90,9 +71,7 @@ def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
     img_container["webcam"] = img
     frame_with_boxes = detect_and_draw_faces(img.copy())
     img_container["analyzed"] = frame_with_boxes
     return frame
-#    return av.VideoFrame.from_ndarray(frame_with_boxes, format="bgr24")
 ice_servers = get_ice_servers()
@@ -135,6 +114,8 @@ st.markdown(
 st.title("Computer Vision Test Lab")
 st.subheader("Facial Sentiment Analysis")
 # Columns for input and output streams
 col1, col2 = st.columns(2)
@@ -150,31 +131,50 @@ with col1:
         async_processing=True,
     )
 with col2:
     st.header("Analysis")
-    st.subheader("Input Frame")
     input_placeholder = st.empty()
-    st.subheader("Output Frame")
     output_placeholder = st.empty()
 if webrtc_ctx.state.playing:
-    if st.checkbox("Show the detected labels", value=True):
-        labels_placeholder = st.empty()
-        # NOTE: The video transformation with object detection and
-        # this loop displaying the result labels are running
-        # in different threads asynchronously.
-        # Then the rendered video frames and the labels displayed here
-        # are not strictly synchronized.
-        while True:
-            result = result_queue.get()
             labels_placeholder.table(result)
-            img = img_container["webcam"]
-            frame_with_boxes = img_container["analyzed"]
-            if img is None:
-                continue
-            input_placeholder.image(img, channels="BGR")
-            output_placeholder.image(frame_with_boxes, channels="BGR")

 from PIL import Image, ImageDraw
 from transformers import pipeline
 # Initialize the Hugging Face pipeline for facial emotion detection
 emotion_pipeline = pipeline("image-classification", model="trpakov/vit-face-expression")
+img_container = {"webcam": None, "analyzed": None, "uploaded": None}
 # Initialize MTCNN for face detection
 mtcnn = MTCNN()
     score: float
     box: np.ndarray
 result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
 # Function to analyze sentiment
 def analyze_sentiment(face):
     rgb_face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
     pil_image = Image.fromarray(rgb_face)
     results = emotion_pipeline(pil_image)
     dominant_emotion = max(results, key=lambda x: x['score'])['label']
     return dominant_emotion
 # Function to detect faces, analyze sentiment, and draw a red box around them
 def detect_and_draw_faces(frame):
     results = mtcnn.detect_faces(frame)
     for result in results:
         x, y, w, h = result['box']
         face = frame[y:y+h, x:x+w]
         sentiment = analyze_sentiment(face)
+        cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 0, 255), LINE_SIZE)
         text_size = cv2.getTextSize(sentiment, cv2.FONT_HERSHEY_SIMPLEX, TEXT_SIZE, 2)[0]
         text_x = x
         text_y = y - 10
         background_tl = (text_x, text_y - text_size[1])
         background_br = (text_x + text_size[0], text_y + 5)
         cv2.rectangle(frame, background_tl, background_br, (0, 0, 0), cv2.FILLED)
         cv2.putText(frame, sentiment, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, TEXT_SIZE, (255, 255, 255), 2)
     result_queue.put(results)
     return frame
     img_container["webcam"] = img
     frame_with_boxes = detect_and_draw_faces(img.copy())
     img_container["analyzed"] = frame_with_boxes
     return frame
 ice_servers = get_ice_servers()
 st.title("Computer Vision Test Lab")
 st.subheader("Facial Sentiment Analysis")
+show_labels = st.checkbox("Show the detected labels", value=True)
 # Columns for input and output streams
 col1, col2 = st.columns(2)
         async_processing=True,
     )
+    st.subheader("Upload an Image")
+    uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
 with col2:
     st.header("Analysis")
+    input_subheader_placeholder = st.empty()
     input_placeholder = st.empty()
+    output_subheader_placeholder = st.empty()
     output_placeholder = st.empty()
 if webrtc_ctx.state.playing:
+    labels_placeholder = st.empty()
+    input_subheader_placeholder.subheader("Input Frame")
+    output_subheader_placeholder.subheader("Output Frame")
+    while True:
+        result = result_queue.get()
+        if show_labels:
             labels_placeholder.table(result)
+        img = img_container["webcam"]
+        frame_with_boxes = img_container["analyzed"]
+        if img is None:
+            continue
+        input_placeholder.image(img, channels="BGR")
+        output_placeholder.image(frame_with_boxes, channels="BGR")
+if uploaded_file is not None:
+    input_subheader_placeholder.subheader("Input Frame")
+    output_subheader_placeholder.subheader("Output Frame")
+    image = Image.open(uploaded_file)
+    img = np.array(image.convert("RGB"))  # Ensure image is in RGB format
+    img_container["uploaded"] = img
+    analyzed_img = detect_and_draw_faces(img.copy())
+    input_placeholder.image(img)
+    output_placeholder.image(analyzed_img)
+    result = result_queue.get()
+    if show_labels:
+        labels_placeholder = st.empty()
+        labels_placeholder.table(result)