Spaces:

ychafiqui
/

sygma-damage-annotation

Runtime error

File size: 2,626 Bytes

1ecc339
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a8a7e6d
 
c5ff882
 
 
 
a8a7e6d
c6c388a
1ecc339
c6c388a
c5ff882
 
c6c388a
 
 
1ecc339

import boto3
from PIL import Image
import pandas as pd
import streamlit as st
import random
import io

s3_client = boto3.client('s3',
    aws_access_key_id=st.secrets["aws_access_key_id"],
    aws_secret_access_key=st.secrets["aws_secret_access_key"],
    region_name='eu-west-3')
bucket_name = "sygma-global-data-storage"
folder = "car-damage-detection/scrappedImages/"

csv_folder = "car-damage-detection/CSVs/"
s3_df_path = csv_folder + "70k_old_annotations_fixed.csv"
response = s3_client.get_object(Bucket=bucket_name, Key=s3_df_path)
with io.BytesIO(response['Body'].read()) as bio:
    df = pd.read_csv(bio, low_memory=False)
df = df[df['s3_available'] == True]

def get_car_parts_count():
    car_parts = df.columns[6:]
    return car_parts
    # # create a dictionary with the count of each part, +1 for a part if value > 0
    # car_parts_count = {part: len(df[df[part] > 0]) for part in car_parts}
    # return [f"{part} ({count})" for part, count in car_parts_count.items()]

def get_random_image(parts_filter=False):
    not_validated_imgs = df[df["validated"] == False]["img_name"].tolist()
    if parts_filter:
        # # remove the count from the part name
        # parts_filter = [part.split(" (")[0] for part in parts_filter]
        # get rows where all selected parts are damaged (> 0)
        filtered_imgs = df[(df[parts_filter] > 0).all(axis=1)]["img_name"].tolist()
        not_validated_imgs = list(set(not_validated_imgs) & set(filtered_imgs))
    if len(not_validated_imgs) == 0:
        return None, None
    image_name = random.choice(not_validated_imgs)
    s3_image_path = folder + image_name
    try:
        response = s3_client.get_object(Bucket=bucket_name, Key=s3_image_path)
        image = Image.open(io.BytesIO(response['Body'].read())).resize((1000, 800))
        return image, image_name
    except:
        return get_random_image()

def get_img_damages(img_name):
    img_row = df.loc[df["img_name"] == img_name]
    damages = img_row.iloc[0, 6:].to_dict()
    return damages

def process_image(img_name, annotator_name, is_car, skip, rotation, damaged_parts):
    df.loc[df["img_name"] == img_name, "annotator_name"] = annotator_name
    df.loc[df["img_name"] == img_name, "is_car"] = is_car
    df.loc[df["img_name"] == img_name, "rotation"] = rotation
    if not skip:
        df.loc[df["img_name"] == img_name, damaged_parts.keys()] = damaged_parts.values()
    df.loc[df["img_name"] == img_name, "validated"] = not skip
    # df.to_csv("CSVs/70k_old_annotations_fixed.csv", index=False)
    s3_client.put_object(Bucket=bucket_name, Key=s3_df_path, Body=df.to_csv(index=False))