Reacher commited on
Commit
750920d
·
1 Parent(s): f2a192e

added vid in ference

Browse files
Files changed (3) hide show
  1. .gitignore +2 -1
  2. app.py +24 -28
  3. predict.py +48 -0
.gitignore CHANGED
@@ -1,5 +1,6 @@
1
  flagged/
 
2
  *.png
3
- *.mp4
4
  *.mkv
 
5
  gradio_cached_examples/
 
1
  flagged/
2
+ __pycache__/
3
  *.png
 
4
  *.mkv
5
+ *.mp4
6
  gradio_cached_examples/
app.py CHANGED
@@ -1,32 +1,11 @@
1
  import gradio as gr
2
- import cv2
3
  import requests
4
- from ultralytics import YOLO
 
5
 
6
- model = YOLO('best.pt')
7
- path = [['image.jpg'],]
8
- classes = ['ain', 'al', 'aleff','bb','dal','dha','dhad','fa','gaaf','ghain','ha','haa','jeem','kaaf','khaa','la','laam',
9
- 'meem','nun','ra','saad','seen','sheen','ta','taa','thaa','thal','toot','waw','ya','yaa','zay']
10
- TargetMapper = dict(zip(range(32),classes))
11
- def show_preds_image(image_path):
12
- print(image_path)
13
- image = cv2.imread(image_path)
14
- outputs = model.predict(source=image_path)
15
- results = outputs[0]
16
- for i,det in enumerate(results.boxes.xyxy):
17
- cls = TargetMapper[results.boxes.cls.numpy()[i]]
18
- #det = results.boxes.xyxy[0]
19
- cv2.rectangle(
20
- image,
21
- (int(det[0]), int(det[1])),
22
- (int(det[2]), int(det[3])),
23
- color=(0, 0, 255),
24
- thickness=2,
25
- lineType=cv2.LINE_AA
26
- )
27
- cv2.putText(image, cls, (int(det[0]), int(det[1])-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36,255,12), 2)
28
 
29
- return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
 
30
 
31
  #image = cv2.imwrite('output.jpg', show_preds_image(path))
32
  inputs_image = [
@@ -35,11 +14,28 @@ inputs_image = [
35
  outputs_image = [
36
  gr.components.Image(type="numpy", label="Output Image"),
37
  ]
38
- gr.Interface(
39
- fn=show_preds_image,
40
  inputs=inputs_image,
41
  outputs=outputs_image,
42
  title="Arab Sign Language Detection app",
43
  examples=path,
44
  cache_examples=False,
45
- ).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
2
  import requests
3
+ from predict import image_inference,video_inference
4
+
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ path = [['image.jpg'],]
8
+ video_path = [['video_.mp4']]
9
 
10
  #image = cv2.imwrite('output.jpg', show_preds_image(path))
11
  inputs_image = [
 
14
  outputs_image = [
15
  gr.components.Image(type="numpy", label="Output Image"),
16
  ]
17
+ image_interface = gr.Interface(
18
+ fn=image_inference,
19
  inputs=inputs_image,
20
  outputs=outputs_image,
21
  title="Arab Sign Language Detection app",
22
  examples=path,
23
  cache_examples=False,
24
+ )#.launch(share=True)
25
+ inputs_video = [
26
+ gr.components.Video(type='filepath',label='Input Video'),
27
+ ]
28
+ outputs_video = [
29
+ gr.components.Image(type='numpy',label='Output Video')
30
+ ]
31
+ interface_video = gr.Interface(
32
+ fn=video_inference,
33
+ inputs=inputs_video,
34
+ outputs=outputs_video,
35
+ title="Arab Sign Language Detection app",
36
+ examples=video_path
37
+ )
38
+ gr.TabbedInterface(
39
+ [image_interface, interface_video],
40
+ tab_names=['Image inference', 'Video inference']
41
+ ).queue().launch()
predict.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ from ultralytics import YOLO
3
+
4
+
5
+ classes = ['ain', 'al', 'aleff','bb','dal','dha','dhad','fa','gaaf','ghain','ha','haa','jeem','kaaf','khaa','la','laam',
6
+ 'meem','nun','ra','saad','seen','sheen','ta','taa','thaa','thal','toot','waw','ya','yaa','zay']
7
+ TargetMapper = dict(zip(range(32),classes))
8
+ model = YOLO('best.pt')
9
+ def image_inference(image_path):
10
+ print(image_path)
11
+ image = cv2.imread(image_path)
12
+ outputs = model.predict(source=image_path)
13
+ results = outputs[0]
14
+ for i,det in enumerate(results.boxes.xyxy):
15
+ cls = TargetMapper[results.boxes.cls.numpy()[i]]
16
+ #det = results.boxes.xyxy[0]
17
+ cv2.rectangle(
18
+ image,
19
+ (int(det[0]), int(det[1])),
20
+ (int(det[2]), int(det[3])),
21
+ color=(0, 0, 255),
22
+ thickness=2,
23
+ lineType=cv2.LINE_AA
24
+ )
25
+ cv2.putText(image, cls, (int(det[0]), int(det[1])-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36,255,12), 2)
26
+
27
+ return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
28
+
29
+ def video_inference(video_path) :
30
+ cap = cv2.VideoCapture(video_path)
31
+ while(cap.isOpened()):
32
+ ret, frame = cap.read()
33
+ if ret:
34
+ frame_copy = frame.copy()
35
+ outputs = model.predict(source=frame)
36
+ results = outputs[0]#.cpu().numpy()
37
+ for i, det in enumerate(results.boxes.xyxy):
38
+ cls = TargetMapper[results.boxes.cls.numpy()[i]]
39
+ cv2.rectangle(
40
+ frame_copy,
41
+ (int(det[0]), int(det[1])),
42
+ (int(det[2]), int(det[3])),
43
+ color=(0, 0, 255),
44
+ thickness=2,
45
+ lineType=cv2.LINE_AA
46
+ )
47
+ cv2.putText(frame_copy, cls, (int(det[0]), int(det[1])-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36,255,12), 2)
48
+ yield cv2.cvtColor(frame_copy, cv2.COLOR_BGR2RGB)