Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -19,7 +19,7 @@ api_key = os.getenv("OPEN_AI_KEY")
|
|
19 |
user_name = os.getenv("USER_NAME")
|
20 |
password = os.getenv("PASSWORD")
|
21 |
|
22 |
-
LENGTH = 3
|
23 |
WEBCAM = 0
|
24 |
|
25 |
MARKDOWN = """
|
@@ -32,7 +32,7 @@ AVATARS = (
|
|
32 |
|
33 |
# Set your OpenAI API key
|
34 |
openai.api_key = api_key
|
35 |
-
MODEL
|
36 |
client = openai.OpenAI(api_key=api_key)
|
37 |
|
38 |
# Global variable to stop the video capture loop
|
@@ -69,6 +69,7 @@ def clip_video_segment(input_video_path, start_time, duration):
|
|
69 |
return output_video_path
|
70 |
|
71 |
def encode_to_video_fast(frames, fps):
|
|
|
72 |
os.makedirs('videos', exist_ok=True)
|
73 |
video_clip_path = f"videos/{uuid.uuid4()}.mp4"
|
74 |
|
@@ -87,6 +88,7 @@ def encode_to_video_fast(frames, fps):
|
|
87 |
|
88 |
return video_clip_path
|
89 |
|
|
|
90 |
def encode_to_video(frames, fps):
|
91 |
os.makedirs('videos', exist_ok=True)
|
92 |
video_clip_path = f"videos/{uuid.uuid4()}.mp4"
|
@@ -102,9 +104,9 @@ def encode_to_video(frames, fps):
|
|
102 |
return video_clip_path
|
103 |
|
104 |
# Function to process video frames using GPT-4 API
|
105 |
-
def process_frames(frames, frames_to_skip=1):
|
106 |
os.makedirs('saved_frames', exist_ok=True)
|
107 |
-
curr_frame
|
108 |
base64Frames = []
|
109 |
while curr_frame < len(frames) - 1:
|
110 |
_, buffer = cv2.imencode(".jpg", frames[curr_frame])
|
@@ -117,6 +119,8 @@ def check_condition(prompt, base64Frames):
|
|
117 |
start_time = time.time()
|
118 |
print('checking condition for frames:', len(base64Frames))
|
119 |
|
|
|
|
|
120 |
try:
|
121 |
messages = [
|
122 |
{"role": "system", "content": """You are analyzing video to check if the user's condition is met.
|
@@ -129,14 +133,14 @@ def check_condition(prompt, base64Frames):
|
|
129 |
model="gpt-4o",
|
130 |
messages=messages,
|
131 |
temperature=0,
|
132 |
-
response_format={"type": "json_object"}
|
133 |
)
|
134 |
|
135 |
end_time = time.time()
|
136 |
processing_time = end_time - start_time
|
137 |
frames_count = len(base64Frames)
|
138 |
api_response = response.choices[0].message.content
|
139 |
-
except
|
140 |
print('error from openai', e)
|
141 |
return 0, 0, {"condition_met": False}
|
142 |
|
@@ -147,6 +151,7 @@ def check_condition(prompt, base64Frames):
|
|
147 |
except:
|
148 |
print('result', response.usage.total_tokens, api_response)
|
149 |
return frames_count, processing_time, api_response
|
|
|
150 |
|
151 |
# Function to process video clip and update the chatbot
|
152 |
def process_clip(prompt, frames, chatbot):
|
@@ -178,23 +183,19 @@ def process_clip_from_file(prompt, frames, chatbot, fps, video_path, id):
|
|
178 |
global stop_capture
|
179 |
if not stop_capture:
|
180 |
israel_tz = pytz.timezone('Asia/Jerusalem')
|
181 |
-
|
182 |
-
print("[Start]:",
|
183 |
|
184 |
frames_to_skip = int(fps)
|
185 |
base64Frames = process_frames(frames, frames_to_skip)
|
186 |
frames_count, processing_time, api_response = check_condition(prompt, base64Frames)
|
187 |
|
|
|
188 |
if api_response and api_response.get("condition_met", False):
|
189 |
-
#
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
# Format times
|
194 |
-
start_time_formatted = time.strftime('%H:%M:%S', time.gmtime(start_time))
|
195 |
-
end_time_formatted = time.strftime('%H:%M:%S', time.gmtime(end_time))
|
196 |
-
|
197 |
-
chatbot.append((f"Event ID: {id+1}\nStart Time: {start_time_formatted}\nEnd Time: {end_time_formatted}\nDetails: {api_response.get('details', '')}", None))
|
198 |
|
199 |
return chatbot
|
200 |
|
@@ -202,23 +203,20 @@ def process_clip_from_file(prompt, frames, chatbot, fps, video_path, id):
|
|
202 |
def process_clip_from_file_sync(prompt, frames, fps, video_path, id):
|
203 |
global stop_capture
|
204 |
if not stop_capture:
|
205 |
-
|
206 |
-
start_time =
|
207 |
-
|
208 |
-
|
209 |
-
# Format times as HH:MM:SS
|
210 |
-
start_time_formatted = time.strftime('%H:%M:%S', time.gmtime(start_time))
|
211 |
-
end_time_formatted = time.strftime('%H:%M:%S', time.gmtime(end_time))
|
212 |
-
|
213 |
frames_to_skip = int(fps)
|
214 |
base64Frames = process_frames(frames, frames_to_skip)
|
215 |
frames_count, processing_time, api_response = check_condition(prompt, base64Frames)
|
216 |
|
217 |
if api_response and api_response.get("condition_met", False):
|
|
|
218 |
event = {
|
219 |
'event_id': id + 1,
|
220 |
-
'
|
221 |
-
'
|
222 |
'details': api_response.get('details', '')
|
223 |
}
|
224 |
return event
|
@@ -229,6 +227,7 @@ def analyze_stream(prompt, stream, chatbot):
|
|
229 |
global stop_capture
|
230 |
stop_capture = False
|
231 |
|
|
|
232 |
cap = cv2.VideoCapture(stream or WEBCAM)
|
233 |
|
234 |
frames = []
|
@@ -258,7 +257,7 @@ def analyze_video_file(prompt, video_path, chatbot):
|
|
258 |
|
259 |
# Get video properties
|
260 |
fps = int(cap.get(cv2.CAP_PROP_FPS)) # Frames per second
|
261 |
-
frames_per_chunk =
|
262 |
|
263 |
frames = []
|
264 |
chunk = 0
|
@@ -277,12 +276,12 @@ def analyze_video_file(prompt, video_path, chatbot):
|
|
277 |
if len(frames) >= frames_per_chunk:
|
278 |
futures.append(executor.submit(process_clip_from_file, prompt, frames.copy(), chatbot, fps, video_path, chunk))
|
279 |
frames = []
|
280 |
-
chunk
|
281 |
|
282 |
# If any remaining frames that are less than LENGTH seconds, process them as a final chunk
|
283 |
if len(frames) > 0:
|
284 |
futures.append(executor.submit(process_clip_from_file, prompt, frames.copy(), chatbot, fps, video_path, chunk))
|
285 |
-
chunk
|
286 |
|
287 |
cap.release()
|
288 |
# Yield results as soon as each thread completes
|
@@ -300,7 +299,7 @@ def analyze_video_file_sync(prompt, video_path):
|
|
300 |
|
301 |
# Get video properties
|
302 |
fps = int(cap.get(cv2.CAP_PROP_FPS)) # Frames per second
|
303 |
-
frames_per_chunk =
|
304 |
|
305 |
frames = []
|
306 |
chunk = 0
|
@@ -320,12 +319,12 @@ def analyze_video_file_sync(prompt, video_path):
|
|
320 |
if len(frames) >= frames_per_chunk:
|
321 |
futures.append(executor.submit(process_clip_from_file_sync, prompt, frames.copy(), fps, video_path, chunk))
|
322 |
frames = []
|
323 |
-
chunk
|
324 |
|
325 |
# If any remaining frames that are less than LENGTH seconds, process them as a final chunk
|
326 |
if len(frames) > 0:
|
327 |
futures.append(executor.submit(process_clip_from_file_sync, prompt, frames.copy(), fps, video_path, chunk))
|
328 |
-
chunk
|
329 |
|
330 |
cap.release()
|
331 |
# Collect results as threads complete
|
@@ -365,10 +364,10 @@ with gr.Blocks(title="Conntour", fill_height=True) as demo:
|
|
365 |
# Add new API endpoint (without UI components)
|
366 |
with gr.Row(visible=False) as hidden_api:
|
367 |
api_prompt = gr.Textbox(label="Prompt")
|
368 |
-
api_video = gr.
|
369 |
api_output = gr.JSON(label="Captured Events")
|
370 |
api_btn = gr.Button("Analyze Video File")
|
371 |
|
372 |
api_btn.click(analyze_video_file_sync, inputs=[api_prompt, api_video], outputs=[api_output])
|
373 |
|
374 |
-
demo.launch(favicon_path='favicon.ico', auth=(user_name, password))
|
|
|
19 |
user_name = os.getenv("USER_NAME")
|
20 |
password = os.getenv("PASSWORD")
|
21 |
|
22 |
+
LENGTH = 3
|
23 |
WEBCAM = 0
|
24 |
|
25 |
MARKDOWN = """
|
|
|
32 |
|
33 |
# Set your OpenAI API key
|
34 |
openai.api_key = api_key
|
35 |
+
MODEL="gpt-4o"
|
36 |
client = openai.OpenAI(api_key=api_key)
|
37 |
|
38 |
# Global variable to stop the video capture loop
|
|
|
69 |
return output_video_path
|
70 |
|
71 |
def encode_to_video_fast(frames, fps):
|
72 |
+
|
73 |
os.makedirs('videos', exist_ok=True)
|
74 |
video_clip_path = f"videos/{uuid.uuid4()}.mp4"
|
75 |
|
|
|
88 |
|
89 |
return video_clip_path
|
90 |
|
91 |
+
|
92 |
def encode_to_video(frames, fps):
|
93 |
os.makedirs('videos', exist_ok=True)
|
94 |
video_clip_path = f"videos/{uuid.uuid4()}.mp4"
|
|
|
104 |
return video_clip_path
|
105 |
|
106 |
# Function to process video frames using GPT-4 API
|
107 |
+
def process_frames(frames, frames_to_skip = 1):
|
108 |
os.makedirs('saved_frames', exist_ok=True)
|
109 |
+
curr_frame=0
|
110 |
base64Frames = []
|
111 |
while curr_frame < len(frames) - 1:
|
112 |
_, buffer = cv2.imencode(".jpg", frames[curr_frame])
|
|
|
119 |
start_time = time.time()
|
120 |
print('checking condition for frames:', len(base64Frames))
|
121 |
|
122 |
+
# Save frames as images
|
123 |
+
|
124 |
try:
|
125 |
messages = [
|
126 |
{"role": "system", "content": """You are analyzing video to check if the user's condition is met.
|
|
|
133 |
model="gpt-4o",
|
134 |
messages=messages,
|
135 |
temperature=0,
|
136 |
+
response_format={ "type": "json_object" }
|
137 |
)
|
138 |
|
139 |
end_time = time.time()
|
140 |
processing_time = end_time - start_time
|
141 |
frames_count = len(base64Frames)
|
142 |
api_response = response.choices[0].message.content
|
143 |
+
except Error as e:
|
144 |
print('error from openai', e)
|
145 |
return 0, 0, {"condition_met": False}
|
146 |
|
|
|
151 |
except:
|
152 |
print('result', response.usage.total_tokens, api_response)
|
153 |
return frames_count, processing_time, api_response
|
154 |
+
|
155 |
|
156 |
# Function to process video clip and update the chatbot
|
157 |
def process_clip(prompt, frames, chatbot):
|
|
|
183 |
global stop_capture
|
184 |
if not stop_capture:
|
185 |
israel_tz = pytz.timezone('Asia/Jerusalem')
|
186 |
+
start_time = datetime.now(israel_tz).strftime('%H:%M:%S')
|
187 |
+
print("[Start]:", start_time, len(frames))
|
188 |
|
189 |
frames_to_skip = int(fps)
|
190 |
base64Frames = process_frames(frames, frames_to_skip)
|
191 |
frames_count, processing_time, api_response = check_condition(prompt, base64Frames)
|
192 |
|
193 |
+
result = None
|
194 |
if api_response and api_response.get("condition_met", False):
|
195 |
+
# video_clip_path = encode_to_video_fast(frames, fps)
|
196 |
+
video_clip_path = clip_video_segment_2(video_path, id*LENGTH, LENGTH)
|
197 |
+
chatbot.append(((video_clip_path,), None))
|
198 |
+
chatbot.append((f"Event ID: {id+1}\nDetails: {api_response.get('details', '')}", None))
|
|
|
|
|
|
|
|
|
|
|
199 |
|
200 |
return chatbot
|
201 |
|
|
|
203 |
def process_clip_from_file_sync(prompt, frames, fps, video_path, id):
|
204 |
global stop_capture
|
205 |
if not stop_capture:
|
206 |
+
israel_tz = pytz.timezone('Asia/Jerusalem')
|
207 |
+
start_time = datetime.now(israel_tz).strftime('%H:%M:%S')
|
208 |
+
print("[Start]:", start_time, len(frames))
|
209 |
+
|
|
|
|
|
|
|
|
|
210 |
frames_to_skip = int(fps)
|
211 |
base64Frames = process_frames(frames, frames_to_skip)
|
212 |
frames_count, processing_time, api_response = check_condition(prompt, base64Frames)
|
213 |
|
214 |
if api_response and api_response.get("condition_met", False):
|
215 |
+
video_clip_path = clip_video_segment_2(video_path, id*LENGTH, LENGTH)
|
216 |
event = {
|
217 |
'event_id': id + 1,
|
218 |
+
'video_clip_path': video_clip_path,
|
219 |
+
'start_time': start_time,
|
220 |
'details': api_response.get('details', '')
|
221 |
}
|
222 |
return event
|
|
|
227 |
global stop_capture
|
228 |
stop_capture = False
|
229 |
|
230 |
+
|
231 |
cap = cv2.VideoCapture(stream or WEBCAM)
|
232 |
|
233 |
frames = []
|
|
|
257 |
|
258 |
# Get video properties
|
259 |
fps = int(cap.get(cv2.CAP_PROP_FPS)) # Frames per second
|
260 |
+
frames_per_chunk = fps * LENGTH # Number of frames per LENGTH-second chunk
|
261 |
|
262 |
frames = []
|
263 |
chunk = 0
|
|
|
276 |
if len(frames) >= frames_per_chunk:
|
277 |
futures.append(executor.submit(process_clip_from_file, prompt, frames.copy(), chatbot, fps, video_path, chunk))
|
278 |
frames = []
|
279 |
+
chunk+=1
|
280 |
|
281 |
# If any remaining frames that are less than LENGTH seconds, process them as a final chunk
|
282 |
if len(frames) > 0:
|
283 |
futures.append(executor.submit(process_clip_from_file, prompt, frames.copy(), chatbot, fps, video_path, chunk))
|
284 |
+
chunk+=1
|
285 |
|
286 |
cap.release()
|
287 |
# Yield results as soon as each thread completes
|
|
|
299 |
|
300 |
# Get video properties
|
301 |
fps = int(cap.get(cv2.CAP_PROP_FPS)) # Frames per second
|
302 |
+
frames_per_chunk = fps * LENGTH # Number of frames per LENGTH-second chunk
|
303 |
|
304 |
frames = []
|
305 |
chunk = 0
|
|
|
319 |
if len(frames) >= frames_per_chunk:
|
320 |
futures.append(executor.submit(process_clip_from_file_sync, prompt, frames.copy(), fps, video_path, chunk))
|
321 |
frames = []
|
322 |
+
chunk+=1
|
323 |
|
324 |
# If any remaining frames that are less than LENGTH seconds, process them as a final chunk
|
325 |
if len(frames) > 0:
|
326 |
futures.append(executor.submit(process_clip_from_file_sync, prompt, frames.copy(), fps, video_path, chunk))
|
327 |
+
chunk+=1
|
328 |
|
329 |
cap.release()
|
330 |
# Collect results as threads complete
|
|
|
364 |
# Add new API endpoint (without UI components)
|
365 |
with gr.Row(visible=False) as hidden_api:
|
366 |
api_prompt = gr.Textbox(label="Prompt")
|
367 |
+
api_video = gr.Textbox(label="Prompt")
|
368 |
api_output = gr.JSON(label="Captured Events")
|
369 |
api_btn = gr.Button("Analyze Video File")
|
370 |
|
371 |
api_btn.click(analyze_video_file_sync, inputs=[api_prompt, api_video], outputs=[api_output])
|
372 |
|
373 |
+
demo.launch(favicon_path='favicon.ico', auth=(user_name, password))
|