sitammeur commited on
Commit
a443a51
·
verified ·
1 Parent(s): dbba62e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -70
app.py CHANGED
@@ -1,70 +1,64 @@
1
- # Importing the requirements
2
- import warnings
3
- warnings.filterwarnings("ignore")
4
-
5
- import gradio as gr
6
- from src.minicpm.response import describe_image
7
-
8
-
9
- # Image, text query, and input parameters
10
- image = gr.Image(type="pil", label="Image")
11
- question = gr.Textbox(label="Question", placeholder="Enter your question here")
12
- temperature = gr.Slider(
13
- minimum=0.01, maximum=1.99, step=0.01, value=0.7, label="Temperature"
14
- )
15
- top_p = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.8, label="Top P")
16
- top_k = gr.Slider(minimum=0, maximum=1000, step=1, value=100, label="Top K")
17
- max_new_tokens = gr.Slider(minimum=1, maximum=4096, step=1, value=512, label="Max Tokens")
18
-
19
- # Output for the interface
20
- answer = gr.Textbox(label="Predicted answer", show_label=True, show_copy_button=True)
21
-
22
- # Examples for the interface
23
- examples = [
24
- [
25
- "images/cat.jpg",
26
- "How many cats are there?",
27
- 0.7,
28
- 0.8,
29
- 100,
30
- 512,
31
- ],
32
- [
33
- "images/dog.jpg",
34
- "¿De qué color es el perro?",
35
- 0.7,
36
- 0.8,
37
- 100,
38
- 512,
39
- ],
40
- [
41
- "images/bird.jpg",
42
- "Que fait l'oiseau ?",
43
- 0.7,
44
- 0.8,
45
- 100,
46
- 512,
47
- ],
48
- ]
49
-
50
- # Title, description, and article for the interface
51
- title = "Visual Question Answering"
52
- description = "Gradio Demo for the MiniCPM-o 2.6: A GPT-4o Level MLLM for Vision, Speech and Multimodal Live Streaming. This model can answer questions about images in natural language. To use it, upload your image, type a question, select associated parameters, use the default values, click 'Submit', or click one of the examples to load them. You can read more at the links below."
53
- article = "<p style='text-align: center'><a href='https://github.com/OpenBMB/MiniCPM-o' target='_blank'>Model GitHub Repo</a> | <a href='https://huggingface.co/openbmb/MiniCPM-o-2_6' target='_blank'>Model Page</a></p>"
54
-
55
-
56
- # Launch the interface
57
- interface = gr.Interface(
58
- fn=describe_image,
59
- inputs=[image, question, temperature, top_p, top_k, max_new_tokens],
60
- outputs=answer,
61
- examples=examples,
62
- cache_examples=True,
63
- cache_mode="lazy",
64
- title=title,
65
- description=description,
66
- article=article,
67
- theme="Glass",
68
- flagging_mode="never",
69
- )
70
- interface.launch(debug=False)
 
1
+ # Importing the requirements
2
+ # import warnings
3
+ # warnings.filterwarnings("ignore")
4
+
5
+ import gradio as gr
6
+ from src.yolo.predict_pose import predict_pose
7
+
8
+
9
+ # Image and input parameters
10
+ image = gr.Image(type="pil", label="Image")
11
+ confidence_threshold = gr.Slider(
12
+ minimum=0, maximum=1, step=0.01, value=0.25, label="Confidence threshold"
13
+ )
14
+ iou_threshold = gr.Slider(
15
+ minimum=0, maximum=1, step=0.01, value=0.45, label="IoU threshold"
16
+ )
17
+ max_detections = gr.Slider(
18
+ minimum=1, maximum=300, step=1, value=300, label="Max detections"
19
+ )
20
+ model_name = gr.Radio(
21
+ choices=[
22
+ "yolo11n-pose.pt",
23
+ "yolo11s-pose.pt",
24
+ "yolo11m-pose.pt",
25
+ "yolo11l-pose.pt",
26
+ "yolo11x-pose.pt",
27
+ ],
28
+ label="Model name",
29
+ value="yolo11n-pose.pt",
30
+ )
31
+
32
+ # Output image
33
+ pose_image = gr.Image(type="pil", label="Output Image")
34
+
35
+ # Examples for the interface
36
+ examples = [
37
+ ["images/posing-sample-image3.jpg", 0.25, 0.45, 300, "yolo11n-pose.pt"],
38
+ ["images/posing-sample-image4.jpg", 0.25, 0.45, 300, "yolo11s-pose.pt"],
39
+ ["images/posing-sample-image5.jpg", 0.25, 0.45, 300, "yolo11m-pose.pt"],
40
+ ["images/posing-sample-image1.jpg", 0.25, 0.45, 300, "yolo11l-pose.pt"],
41
+ ["images/posing-sample-image2.png", 0.25, 0.45, 300, "yolo11x-pose.pt"],
42
+ ]
43
+
44
+ # Title, description, and article for the interface
45
+ title = "YOLO11 Pose Estimation"
46
+ description = "Gradio Demo for the YOLO11 Pose Estimation model. This model can detect and predict the poses of people in images. To use it, upload your image, select associated parameters, or use the default values, click 'Submit', or click one of the examples to load them. You can read more at the links below."
47
+ article = "<p style='text-align: center'><a href='https://github.com/ultralytics/ultralytics' target='_blank'>Ultralytics GitHub</a> | <a href='https://docs.ultralytics.com/models/yolo11/' target='_blank'>Model Page</a></p>"
48
+
49
+
50
+ # Launch the interface
51
+ interface = gr.Interface(
52
+ fn=predict_pose,
53
+ inputs=[image, confidence_threshold, iou_threshold, max_detections, model_name],
54
+ outputs=pose_image,
55
+ examples=examples,
56
+ cache_examples=True,
57
+ cache_mode="lazy",
58
+ title=title,
59
+ description=description,
60
+ article=article,
61
+ theme="Base",
62
+ flagging_mode="never",
63
+ )
64
+ interface.launch(debug=False)