tugaa commited on
Commit
4a13218
·
verified ·
1 Parent(s): b308b2a

Create modules/input_processor.py

Browse files
Files changed (1) hide show
  1. modules/input_processor.py +59 -0
modules/input_processor.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # modules/input_processor.py
2
+ import os
3
+ import asyncio
4
+ import mimetypes
5
+ import langdetect
6
+ from PIL import Image
7
+ from moviepy.editor import VideoFileClip
8
+ from openai import AsyncOpenAI
9
+
10
+ client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
11
+
12
+ class InputProcessor:
13
+ def __init__(self):
14
+ pass
15
+
16
+ async def process(self, text, image_path, video_path):
17
+ context = {"modality": [], "text": text}
18
+
19
+ if text:
20
+ try:
21
+ lang = langdetect.detect(text)
22
+ except:
23
+ lang = "unknown"
24
+ context["language"] = lang
25
+ context["modality"].append("text")
26
+
27
+ if image_path:
28
+ image = Image.open(image_path)
29
+ context["modality"].append("image")
30
+ context["image_preview"] = image
31
+ context["image_summary"] = await self.describe_image(image_path)
32
+
33
+ if video_path:
34
+ clip = VideoFileClip(video_path).subclip(0, min(5, VideoFileClip(video_path).duration))
35
+ keyframe_path = "/tmp/keyframe.jpg"
36
+ clip.save_frame(keyframe_path, t=1)
37
+ context["modality"].append("video")
38
+ context["video_preview"] = Image.open(keyframe_path)
39
+ context["video_summary"] = await self.describe_video(video_path)
40
+
41
+ return context
42
+
43
+ async def describe_image(self, image_path):
44
+ with open(image_path, "rb") as f:
45
+ response = await client.chat.completions.create(
46
+ model="gpt-4o",
47
+ messages=[
48
+ {"role": "system", "content": "You are an assistant who explains image contents in concise text."},
49
+ {"role": "user", "content": [{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{f.read().hex()}"}}]}
50
+ ]
51
+ )
52
+ return response.choices[0].message.content
53
+
54
+ async def describe_video(self, video_path):
55
+ # 簡易版: キーフレームからLLMで説明
56
+ clip = VideoFileClip(video_path).subclip(0, min(5, VideoFileClip(video_path).duration))
57
+ keyframe_path = "/tmp/keyframe.jpg"
58
+ clip.save_frame(keyframe_path, t=1)
59
+ return await self.describe_image(keyframe_path)