Update app.py
Browse files
app.py
CHANGED
@@ -350,16 +350,14 @@ def get_single_prompt(user_input):
|
|
350 |
f"User input: \"{user_input}\""
|
351 |
)
|
352 |
|
353 |
-
response = openai.
|
354 |
-
|
355 |
-
|
356 |
-
max_tokens=50,
|
357 |
-
n=1,
|
358 |
-
stop=None,
|
359 |
temperature=0.3,
|
|
|
360 |
)
|
361 |
-
generated_prompt = response.choices[0].
|
362 |
-
# Ensure no question marks
|
363 |
generated_prompt = generated_prompt.replace("?", "")
|
364 |
return generated_prompt
|
365 |
|
@@ -379,12 +377,12 @@ def process_question_and_detect(user_input, image):
|
|
379 |
1. Uses OpenAI to generate a single, concise prompt (without question marks) from the user's input.
|
380 |
2. Feeds that prompt to the VisionAgent detection function.
|
381 |
3. Overlays the detection bounding boxes on the image.
|
382 |
-
4. If the user's input
|
383 |
"""
|
384 |
if image is None:
|
385 |
return None, "Please upload an image."
|
386 |
|
387 |
-
# Generate
|
388 |
generated_prompt = get_single_prompt(user_input)
|
389 |
|
390 |
# Run object detection using the generated prompt.
|
@@ -393,7 +391,7 @@ def process_question_and_detect(user_input, image):
|
|
393 |
# Overlay bounding boxes on the image.
|
394 |
viz = T.overlay_bounding_boxes(image, dets)
|
395 |
|
396 |
-
#
|
397 |
count_text = ""
|
398 |
if is_count_query(user_input):
|
399 |
count = len(dets)
|
@@ -402,7 +400,6 @@ def process_question_and_detect(user_input, image):
|
|
402 |
output_text = f"Generated prompt: {generated_prompt}\n{count_text}"
|
403 |
return viz, output_text
|
404 |
|
405 |
-
# Build the Gradio interface.
|
406 |
with gr.Blocks() as demo:
|
407 |
gr.Markdown("# VisionAgent Object Detection and Counting App")
|
408 |
gr.Markdown(
|
@@ -429,6 +426,7 @@ with gr.Blocks() as demo:
|
|
429 |
|
430 |
submit_btn.click(fn=process_question_and_detect, inputs=[user_input, image_input], outputs=[output_image, output_text])
|
431 |
|
432 |
-
demo.launch()
|
|
|
433 |
|
434 |
|
|
|
350 |
f"User input: \"{user_input}\""
|
351 |
)
|
352 |
|
353 |
+
response = openai.ChatCompletion.create(
|
354 |
+
model="gpt-4o", # adjust model name if needed
|
355 |
+
messages=[{"role": "user", "content": prompt_instruction}],
|
|
|
|
|
|
|
356 |
temperature=0.3,
|
357 |
+
max_tokens=50,
|
358 |
)
|
359 |
+
generated_prompt = response.choices[0].message.content.strip()
|
360 |
+
# Ensure no question marks remain.
|
361 |
generated_prompt = generated_prompt.replace("?", "")
|
362 |
return generated_prompt
|
363 |
|
|
|
377 |
1. Uses OpenAI to generate a single, concise prompt (without question marks) from the user's input.
|
378 |
2. Feeds that prompt to the VisionAgent detection function.
|
379 |
3. Overlays the detection bounding boxes on the image.
|
380 |
+
4. If the user's input implies a counting request, it also returns the count of detected objects.
|
381 |
"""
|
382 |
if image is None:
|
383 |
return None, "Please upload an image."
|
384 |
|
385 |
+
# Generate the concise prompt from the user's input.
|
386 |
generated_prompt = get_single_prompt(user_input)
|
387 |
|
388 |
# Run object detection using the generated prompt.
|
|
|
391 |
# Overlay bounding boxes on the image.
|
392 |
viz = T.overlay_bounding_boxes(image, dets)
|
393 |
|
394 |
+
# If the user's input implies a counting request, include the count.
|
395 |
count_text = ""
|
396 |
if is_count_query(user_input):
|
397 |
count = len(dets)
|
|
|
400 |
output_text = f"Generated prompt: {generated_prompt}\n{count_text}"
|
401 |
return viz, output_text
|
402 |
|
|
|
403 |
with gr.Blocks() as demo:
|
404 |
gr.Markdown("# VisionAgent Object Detection and Counting App")
|
405 |
gr.Markdown(
|
|
|
426 |
|
427 |
submit_btn.click(fn=process_question_and_detect, inputs=[user_input, image_input], outputs=[output_image, output_text])
|
428 |
|
429 |
+
demo.launch(share=True)
|
430 |
+
|
431 |
|
432 |
|