luthfi507 commited on
Commit
c155a45
·
verified ·
1 Parent(s): 3763c78

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -123
app.py DELETED
@@ -1,123 +0,0 @@
1
- import cv2
2
- import numpy as np
3
- import torch
4
- from torch import nn
5
- from torchvision import transforms, models
6
- import gradio as gr
7
-
8
- class BISINDOClassifier(nn.Module):
9
- def __init__(self, hidden_dim, num_classes, num_layers=1):
10
- super(BISINDOClassifier, self).__init__()
11
- self.hidden_dim = hidden_dim
12
- self.num_classes = num_classes
13
- self.num_layers = num_layers
14
-
15
- # Load pre-trained VGG-19
16
- self.vgg19 = models.vgg19(pretrained=True)
17
- self.vgg19.classifier = nn.Sequential()
18
-
19
- # GRU
20
- self.gru = nn.GRU(input_size=512*7*7, hidden_size=hidden_dim, num_layers=num_layers, batch_first=True)
21
-
22
- self.fc = nn.Linear(hidden_dim, num_classes)
23
-
24
- def forward(self, x):
25
- batch_size, seq_length, c, h, w = x.size()
26
- x = x.view(batch_size * seq_length, c, h, w)
27
-
28
- with torch.no_grad():
29
- x = self.vgg19.features(x)
30
-
31
- x = x.view(batch_size, seq_length, -1)
32
-
33
- h0 = torch.zeros(self.num_layers, batch_size, self.hidden_dim).to(x.device)
34
- x, _ = self.gru(x, h0)
35
-
36
- x = self.fc(x[:, -1, :])
37
-
38
- return x
39
-
40
- transform = transforms.Compose([
41
- transforms.ToPILImage(),
42
- transforms.Resize((224, 224)),
43
- transforms.ToTensor(),
44
- transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
45
- ])
46
-
47
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
48
-
49
- model = BISINDOClassifier(hidden_dim=512, num_classes=40, num_layers=1).to(device)
50
- # model.load_state_dict(torch.load('model.pth'))
51
- model = model.from_pretrained('luthfi507/bisindo-model')
52
-
53
- classes = {'Bagaimana', 'Berapa', 'Baik', 'Selamat Malam', 'Halo', 'Pendek', 'Tidur', 'Selamat Sore', 'Membaca', 'Senang', 'Kemana', 'Dia', 'Apa Kabar', 'Saya', 'Apa', 'Kita', 'Sabar', 'Selamat Siang', 'Kalian', 'Dimana', 'Duduk', 'Kapan', 'Mereka', 'Kamu', 'Ramah', 'Makan', 'Tinggi', 'Marah', 'Berdiri', 'Melihat', 'Minum', 'Siapa', 'Selamat Pagi', 'Kami', 'Mandi', 'Menulis', 'Terima Kasih', 'Sedih', 'Bingung', 'Belajar'}
54
- classes = sorted(list(classes))
55
-
56
- def extract_frames(video_path, sequence_length, interval=10, img_size=(224, 224)):
57
- cap = cv2.VideoCapture(video_path)
58
-
59
- if not cap.isOpened():
60
- print(f"Error opening video file {video_path}")
61
- return []
62
-
63
- frames = []
64
- frame_count = 0
65
-
66
- while len(frames) < sequence_length:
67
- ret, frame = cap.read()
68
- if not ret:
69
- break
70
- if frame_count % interval == 0:
71
- frame = cv2.resize(frame, img_size)
72
- frames.append(frame)
73
- frame_count += 1
74
-
75
- cap.release()
76
-
77
- if len(frames) < sequence_length:
78
- while len(frames) < sequence_length:
79
- if frames:
80
- frames.append(frames[-1])
81
- else:
82
- frames.append(cv2.resize(np.zeros((img_size[1], img_size[0], 3), dtype=np.uint8), img_size))
83
- else:
84
- frames = frames[:sequence_length]
85
-
86
- return frames
87
-
88
- def preprocess_frames(frames):
89
- frames = [transform(frame) for frame in frames]
90
- frames = torch.stack(frames)
91
- return frames.unsqueeze(0)
92
-
93
- def predict(video_path, sequence_length):
94
- model.eval()
95
-
96
- frames = extract_frames(video_path, sequence_length)
97
- input_tensor = preprocess_frames(frames)
98
- input_tensor = input_tensor.to(device)
99
-
100
- with torch.no_grad():
101
- output = model(input_tensor)
102
- probabilities = nn.functional.softmax(output, dim=1)
103
- confidence, predicted = torch.max(probabilities, 1)
104
-
105
- confidence_score = confidence.item()
106
- predicted_label = classes[predicted.item()]
107
-
108
- return {"label": predicted_label, "confidence": confidence_score}
109
-
110
- with gr.Blocks() as demo:
111
- with gr.Row():
112
- input_video = gr.Video(label="Input")
113
- output_json = gr.JSON(label="Output")
114
- process_video_btn = gr.Button("Process Video")
115
-
116
- def process_video(video):
117
- result = predict(video, sequence_length=10)
118
- result["confidence"] = f"{result['confidence']:.2f}"
119
- return result
120
-
121
- process_video_btn.click(process_video, input_video, output_json)
122
-
123
- demo.launch()