Spaces:

Detomo
/

aisatsu-api

Sleeping

App Files Files Community

aisatsu-api / main.py

vumichien

Create main.py

b7f8699 about 2 years ago

raw

history blame

1.93 kB

	from ultralyticsplus import YOLO
	from base64 import b64encode
	from speech_recognition import AudioFile, Recognizer
	import numpy as np
	from scipy.spatial import distance as dist

	from sahi.utils.cv import read_image_as_pil
	from fastapi import FastAPI, File, UploadFile, Form
	from utils import tts, read_image_file, pil_to_base64, base64_to_pil, get_hist
	from typing import Optional

	model = YOLO('ultralyticsplus/yolov8s')
	CLASS = model.model.names

	app = FastAPI()
	defaul_bot_voice = "おはいようございます"
	area_thres = 0.3


	@app.get("/")
	def read_root():
	return {"Message": "Application startup complete"}


	@app.post("/aisatsu_api/")
	async def predict_api(
	file: UploadFile = File(...),
	last_seen: Optional[str] = Form(None)
	):
	image = read_image_file(await file.read())
	results = model.predict(image, show=False)[0]
	image = read_image_as_pil(image)
	masks, boxes = results.masks, results.boxes
	area_image = image.width * image.height
	voice_bot = None
	most_close = 0
	out_img = None
	diff_value = 0.5
	if boxes is not None:
	for xyxy, conf, cls in zip(boxes.xyxy, boxes.conf, boxes.cls):
	if int(cls) != 0:
	continue
	box = xyxy.tolist()
	area_rate = (box[2] - box[0]) * (box[3] - box[1]) / area_image
	if area_rate >= most_close:
	out_img = image.crop(tuple(box)).resize((128, 128))
	most_close = area_rate
	if last_seen is not None:
	last_seen = base64_to_pil(last_seen)
	if out_img is not None:
	diff_value = dist.euclidean(get_hist(out_img), get_hist(last_seen))
	print(most_close, diff_value)
	if most_close >= area_thres and diff_value >= 0.5:
	voice_bot = tts(defaul_bot_voice, language="ja")
	return {
	"voice": voice_bot,
	"image": pil_to_base64(out_img) if out_img is not None else None
	}