Spaces:
Sleeping
Sleeping
small change
Browse files
app.py
CHANGED
@@ -1,203 +1,205 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
import torch
|
3 |
-
import joblib
|
4 |
-
import pandas as pd
|
5 |
-
import numpy as np
|
6 |
-
from scipy.spatial.distance import cosine
|
7 |
-
from pathlib import Path
|
8 |
-
import os
|
9 |
-
|
10 |
-
# Define the model architecture (same as in training)
|
11 |
-
class AESModel(torch.nn.Module):
|
12 |
-
def __init__(self, input_dim, hidden_dim=512, num_layers=3, dropout=0.4):
|
13 |
-
super(AESModel, self).__init__()
|
14 |
-
self.input_layer = torch.nn.Sequential(
|
15 |
-
torch.nn.Linear(input_dim, hidden_dim),
|
16 |
-
torch.nn.BatchNorm1d(hidden_dim),
|
17 |
-
torch.nn.ReLU(),
|
18 |
-
torch.nn.Dropout(dropout)
|
19 |
-
)
|
20 |
-
self.hidden_layers = torch.nn.ModuleList()
|
21 |
-
for _ in range(num_layers - 1):
|
22 |
-
self.hidden_layers.append(torch.nn.Sequential(
|
23 |
-
torch.nn.Linear(hidden_dim, hidden_dim),
|
24 |
-
torch.nn.BatchNorm1d(hidden_dim),
|
25 |
-
torch.nn.ReLU(),
|
26 |
-
torch.nn.Dropout(dropout)
|
27 |
-
))
|
28 |
-
self.output_layer = torch.nn.Sequential(
|
29 |
-
torch.nn.Linear(hidden_dim, 1),
|
30 |
-
torch.nn.Sigmoid()
|
31 |
-
)
|
32 |
-
|
33 |
-
def forward(self, x):
|
34 |
-
x = self.input_layer(x)
|
35 |
-
for layer in self.hidden_layers:
|
36 |
-
x = layer(x)
|
37 |
-
return self.output_layer(x) * 4
|
38 |
-
|
39 |
-
# Load the saved model and components
|
40 |
-
def load_components(model_dir='./final'):
|
41 |
-
try:
|
42 |
-
# Use CPU for Hugging Face Spaces deployment
|
43 |
-
device = torch.device('cpu')
|
44 |
-
|
45 |
-
model_data = torch.load(f'{model_dir}/final_aes_model.pt', map_location='cpu', weights_only=False)
|
46 |
-
preprocessor = joblib.load(f'{model_dir}/preprocessor.pkl')
|
47 |
-
jawaban_essay = pd.read_pickle(f'{model_dir}/jawaban_essay.pkl')
|
48 |
-
training_config = joblib.load(f'{model_dir}/training_config.pkl')
|
49 |
-
|
50 |
-
model = AESModel(
|
51 |
-
input_dim=training_config['model_architecture']['input_dim'],
|
52 |
-
hidden_dim=training_config['model_architecture']['hidden_dim'],
|
53 |
-
num_layers=training_config['model_architecture']['num_layers'],
|
54 |
-
dropout=training_config['model_architecture']['dropout']
|
55 |
-
)
|
56 |
-
model.load_state_dict(model_data['model_state_dict'])
|
57 |
-
model.to(device)
|
58 |
-
model.eval()
|
59 |
-
|
60 |
-
return model, preprocessor, jawaban_essay, device
|
61 |
-
except Exception as e:
|
62 |
-
print(f"Error loading model components: {e}")
|
63 |
-
return None, None, None, None
|
64 |
-
|
65 |
-
# Prediction function that returns multiple outputs
|
66 |
-
def predict_score(question_code, student_answer, model, preprocessor, jawaban_essay, device):
|
67 |
-
try:
|
68 |
-
question_code = int(question_code)
|
69 |
-
processed_text = preprocessor.preprocess_text(student_answer)
|
70 |
-
|
71 |
-
# Get the answer key for this question
|
72 |
-
kunci_jawaban = jawaban_essay[jawaban_essay['Kode'] == question_code]
|
73 |
-
if len(kunci_jawaban) == 0:
|
74 |
-
return "Error: Question code not found", "", "", ""
|
75 |
-
|
76 |
-
question_text = kunci_jawaban['Pertanyaan'].values[0]
|
77 |
-
key_answer = kunci_jawaban['Jawaban'].values[0]
|
78 |
-
|
79 |
-
# Extract features
|
80 |
-
linguistic_features = preprocessor.extract_linguistic_features(processed_text)
|
81 |
-
embedding = preprocessor.get_sbert_embedding(processed_text)
|
82 |
-
embedding_kunci = preprocessor.get_sbert_embedding(kunci_jawaban['processed_kunci_jawaban'].values[0])
|
83 |
-
similarity = 1 - cosine(embedding, embedding_kunci)
|
84 |
-
|
85 |
-
# Prepare feature vector
|
86 |
-
features = {
|
87 |
-
**{f'sbert_{i}': val for i, val in enumerate(embedding)},
|
88 |
-
**{f'ling_{k}': v for k, v in linguistic_features.items()},
|
89 |
-
'similarity': similarity
|
90 |
-
}
|
91 |
-
feature_values = np.array(list(features.values())).astype(np.float32).reshape(1, -1)
|
92 |
-
|
93 |
-
# Make prediction
|
94 |
-
with torch.no_grad():
|
95 |
-
input_tensor = torch.FloatTensor(feature_values).to(device)
|
96 |
-
prediction = model(input_tensor).item()
|
97 |
-
prediction = min(max(prediction, 0), 4)
|
98 |
-
prediction = round(prediction, 2)
|
99 |
-
|
100 |
-
return (f"Predicted Score: {prediction:.2f}/4.00",
|
101 |
-
f"Pertanyaan: {question_text}",
|
102 |
-
f"Kunci Jawaban: {key_answer}",
|
103 |
-
f"Similarity dengan Kunci Jawaban: {similarity:.3f}")
|
104 |
-
except Exception as e:
|
105 |
-
return f"Error: {str(e)}", "", "", ""
|
106 |
-
|
107 |
-
# Load components once at startup
|
108 |
-
print("Loading model components...")
|
109 |
-
model, preprocessor, jawaban_essay, device = load_components()
|
110 |
-
|
111 |
-
if model is not None:
|
112 |
-
print("Model components loaded successfully!")
|
113 |
-
else:
|
114 |
-
print("Failed to load model components. Running in demo mode.")
|
115 |
-
|
116 |
-
# Create Gradio interface with multiple outputs
|
117 |
-
def gradio_predict(question_code, student_answer):
|
118 |
-
if model is None or preprocessor is None:
|
119 |
-
return ("Error: Model not loaded", "Model tidak dapat dimuat", "", "")
|
120 |
-
return predict_score(question_code, student_answer, model, preprocessor, jawaban_essay, device)
|
121 |
-
|
122 |
-
# Check if components are loaded before creating interface
|
123 |
-
if jawaban_essay is not None:
|
124 |
-
# List of available question codes and their questions
|
125 |
-
question_info = [(f"{row['Kode']} - {row['Pertanyaan'][:50]}...", row['Kode'])
|
126 |
-
for _, row in jawaban_essay.iterrows()]
|
127 |
-
else:
|
128 |
-
# Fallback question info for demo
|
129 |
-
question_info = [("Demo - Model tidak dapat dimuat", 1)]
|
130 |
-
|
131 |
-
# Create the Gradio app with enhanced display
|
132 |
-
with gr.Blocks(title="Auto-Scoring Essay Indonesia", theme=gr.themes.Soft()) as app:
|
133 |
-
gr.Markdown("# π Sistem Auto-Scoring Essay Bahasa Indonesia")
|
134 |
-
gr.Markdown("Pilih pertanyaan dan masukkan jawaban siswa untuk mendapatkan prediksi nilai otomatis menggunakan AI.")
|
135 |
-
|
136 |
-
if model is None:
|
137 |
-
gr.Markdown("β οΈ **Model sedang dimuat atau tidak tersedia. Silakan coba lagi nanti.**")
|
138 |
-
|
139 |
-
with gr.Row():
|
140 |
-
with gr.Column():
|
141 |
-
question_input = gr.Dropdown(
|
142 |
-
label="π Pilih Pertanyaan",
|
143 |
-
choices=[info[0] for info in question_info],
|
144 |
-
value=question_info[0][0] if question_info else None
|
145 |
-
)
|
146 |
-
actual_code = gr.Textbox(visible=False) # Hidden field to store actual code
|
147 |
-
answer_input = gr.Textbox(
|
148 |
-
label="βοΈ Jawaban Siswa",
|
149 |
-
placeholder="Masukkan jawaban siswa di sini...",
|
150 |
-
lines=8,
|
151 |
-
max_lines=15
|
152 |
-
)
|
153 |
-
submit_btn = gr.Button("π Prediksi Nilai", variant="primary", size="lg")
|
154 |
-
|
155 |
-
with gr.Column():
|
156 |
-
output_score = gr.Textbox(label="π Hasil Prediksi", interactive=False)
|
157 |
-
output_question = gr.Textbox(label="β Detail Pertanyaan", interactive=False)
|
158 |
-
output_key = gr.Textbox(label="π Kunci Jawaban", interactive=False)
|
159 |
-
output_similarity = gr.Textbox(label="π Similarity Score", interactive=False)
|
160 |
-
|
161 |
-
# Function to update hidden code when question selection changes
|
162 |
-
def update_code(question_choice):
|
163 |
-
for info in question_info:
|
164 |
-
if info[0] == question_choice:
|
165 |
-
return info[1]
|
166 |
-
return question_info[0][1] if question_info else 1
|
167 |
-
|
168 |
-
question_input.change(update_code, inputs=question_input, outputs=actual_code)
|
169 |
-
|
170 |
-
submit_btn.click(
|
171 |
-
fn=gradio_predict,
|
172 |
-
inputs=[actual_code, answer_input],
|
173 |
-
outputs=[output_score, output_question, output_key, output_similarity]
|
174 |
-
)
|
175 |
-
|
176 |
-
# Add some examples if model is loaded
|
177 |
-
if jawaban_essay is not None and len(question_info) > 0:
|
178 |
-
gr.Markdown("## π‘ Contoh Jawaban")
|
179 |
-
examples = [
|
180 |
-
[question_info[0][0], "Ancaman keamanan jaringan meliputi malware seperti virus dan trojan, serangan DDoS yang dapat melumpuhkan server, serta upaya phishing untuk mencuri data pribadi."],
|
181 |
-
[question_info[0][0], "Kebocoran data, penyusupan akun, penghapusan data, spam dan phising merupakan ancaman utama dalam keamanan jaringan komputer."],
|
182 |
-
]
|
183 |
-
|
184 |
-
if len(question_info) > 1:
|
185 |
-
examples.append([
|
186 |
-
question_info[1][0],
|
187 |
-
"Jaringan komputer adalah kumpulan komputer yang terhubung untuk berbagi sumber daya seperti file, printer, dan koneksi internet."
|
188 |
-
])
|
189 |
-
|
190 |
-
gr.Examples(
|
191 |
-
examples=examples,
|
192 |
-
inputs=[question_input, answer_input],
|
193 |
-
outputs=[output_score, output_question, output_key, output_similarity],
|
194 |
-
fn=gradio_predict,
|
195 |
-
cache_examples=False
|
196 |
-
)
|
197 |
-
|
198 |
-
gr.Markdown("---")
|
199 |
-
gr.Markdown("*Sistem ini menggunakan AI untuk memberikan penilaian otomatis pada essay berbahasa Indonesia.*")
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import torch
|
3 |
+
import joblib
|
4 |
+
import pandas as pd
|
5 |
+
import numpy as np
|
6 |
+
from scipy.spatial.distance import cosine
|
7 |
+
from pathlib import Path
|
8 |
+
import os
|
9 |
+
|
10 |
+
# Define the model architecture (same as in training)
|
11 |
+
class AESModel(torch.nn.Module):
|
12 |
+
def __init__(self, input_dim, hidden_dim=512, num_layers=3, dropout=0.4):
|
13 |
+
super(AESModel, self).__init__()
|
14 |
+
self.input_layer = torch.nn.Sequential(
|
15 |
+
torch.nn.Linear(input_dim, hidden_dim),
|
16 |
+
torch.nn.BatchNorm1d(hidden_dim),
|
17 |
+
torch.nn.ReLU(),
|
18 |
+
torch.nn.Dropout(dropout)
|
19 |
+
)
|
20 |
+
self.hidden_layers = torch.nn.ModuleList()
|
21 |
+
for _ in range(num_layers - 1):
|
22 |
+
self.hidden_layers.append(torch.nn.Sequential(
|
23 |
+
torch.nn.Linear(hidden_dim, hidden_dim),
|
24 |
+
torch.nn.BatchNorm1d(hidden_dim),
|
25 |
+
torch.nn.ReLU(),
|
26 |
+
torch.nn.Dropout(dropout)
|
27 |
+
))
|
28 |
+
self.output_layer = torch.nn.Sequential(
|
29 |
+
torch.nn.Linear(hidden_dim, 1),
|
30 |
+
torch.nn.Sigmoid()
|
31 |
+
)
|
32 |
+
|
33 |
+
def forward(self, x):
|
34 |
+
x = self.input_layer(x)
|
35 |
+
for layer in self.hidden_layers:
|
36 |
+
x = layer(x)
|
37 |
+
return self.output_layer(x) * 4
|
38 |
+
|
39 |
+
# Load the saved model and components
|
40 |
+
def load_components(model_dir='./final'):
|
41 |
+
try:
|
42 |
+
# Use CPU for Hugging Face Spaces deployment
|
43 |
+
device = torch.device('cpu')
|
44 |
+
|
45 |
+
model_data = torch.load(f'{model_dir}/final_aes_model.pt', map_location='cpu', weights_only=False)
|
46 |
+
preprocessor = joblib.load(f'{model_dir}/preprocessor.pkl')
|
47 |
+
jawaban_essay = pd.read_pickle(f'{model_dir}/jawaban_essay.pkl')
|
48 |
+
training_config = joblib.load(f'{model_dir}/training_config.pkl')
|
49 |
+
|
50 |
+
model = AESModel(
|
51 |
+
input_dim=training_config['model_architecture']['input_dim'],
|
52 |
+
hidden_dim=training_config['model_architecture']['hidden_dim'],
|
53 |
+
num_layers=training_config['model_architecture']['num_layers'],
|
54 |
+
dropout=training_config['model_architecture']['dropout']
|
55 |
+
)
|
56 |
+
model.load_state_dict(model_data['model_state_dict'])
|
57 |
+
model.to(device)
|
58 |
+
model.eval()
|
59 |
+
|
60 |
+
return model, preprocessor, jawaban_essay, device
|
61 |
+
except Exception as e:
|
62 |
+
print(f"Error loading model components: {e}")
|
63 |
+
return None, None, None, None
|
64 |
+
|
65 |
+
# Prediction function that returns multiple outputs
|
66 |
+
def predict_score(question_code, student_answer, model, preprocessor, jawaban_essay, device):
|
67 |
+
try:
|
68 |
+
question_code = int(question_code)
|
69 |
+
processed_text = preprocessor.preprocess_text(student_answer)
|
70 |
+
|
71 |
+
# Get the answer key for this question
|
72 |
+
kunci_jawaban = jawaban_essay[jawaban_essay['Kode'] == question_code]
|
73 |
+
if len(kunci_jawaban) == 0:
|
74 |
+
return "Error: Question code not found", "", "", ""
|
75 |
+
|
76 |
+
question_text = kunci_jawaban['Pertanyaan'].values[0]
|
77 |
+
key_answer = kunci_jawaban['Jawaban'].values[0]
|
78 |
+
|
79 |
+
# Extract features
|
80 |
+
linguistic_features = preprocessor.extract_linguistic_features(processed_text)
|
81 |
+
embedding = preprocessor.get_sbert_embedding(processed_text)
|
82 |
+
embedding_kunci = preprocessor.get_sbert_embedding(kunci_jawaban['processed_kunci_jawaban'].values[0])
|
83 |
+
similarity = 1 - cosine(embedding, embedding_kunci)
|
84 |
+
|
85 |
+
# Prepare feature vector
|
86 |
+
features = {
|
87 |
+
**{f'sbert_{i}': val for i, val in enumerate(embedding)},
|
88 |
+
**{f'ling_{k}': v for k, v in linguistic_features.items()},
|
89 |
+
'similarity': similarity
|
90 |
+
}
|
91 |
+
feature_values = np.array(list(features.values())).astype(np.float32).reshape(1, -1)
|
92 |
+
|
93 |
+
# Make prediction
|
94 |
+
with torch.no_grad():
|
95 |
+
input_tensor = torch.FloatTensor(feature_values).to(device)
|
96 |
+
prediction = model(input_tensor).item()
|
97 |
+
prediction = min(max(prediction, 0), 4)
|
98 |
+
prediction = round(prediction, 2)
|
99 |
+
|
100 |
+
return (f"Predicted Score: {prediction:.2f}/4.00",
|
101 |
+
f"Pertanyaan: {question_text}",
|
102 |
+
f"Kunci Jawaban: {key_answer}",
|
103 |
+
f"Similarity dengan Kunci Jawaban: {similarity:.3f}")
|
104 |
+
except Exception as e:
|
105 |
+
return f"Error: {str(e)}", "", "", ""
|
106 |
+
|
107 |
+
# Load components once at startup
|
108 |
+
print("Loading model components...")
|
109 |
+
model, preprocessor, jawaban_essay, device = load_components()
|
110 |
+
|
111 |
+
if model is not None:
|
112 |
+
print("Model components loaded successfully!")
|
113 |
+
else:
|
114 |
+
print("Failed to load model components. Running in demo mode.")
|
115 |
+
|
116 |
+
# Create Gradio interface with multiple outputs
|
117 |
+
def gradio_predict(question_code, student_answer):
|
118 |
+
if model is None or preprocessor is None:
|
119 |
+
return ("Error: Model not loaded", "Model tidak dapat dimuat", "", "")
|
120 |
+
return predict_score(question_code, student_answer, model, preprocessor, jawaban_essay, device)
|
121 |
+
|
122 |
+
# Check if components are loaded before creating interface
|
123 |
+
if jawaban_essay is not None:
|
124 |
+
# List of available question codes and their questions
|
125 |
+
question_info = [(f"{row['Kode']} - {row['Pertanyaan'][:50]}...", row['Kode'])
|
126 |
+
for _, row in jawaban_essay.iterrows()]
|
127 |
+
else:
|
128 |
+
# Fallback question info for demo
|
129 |
+
question_info = [("Demo - Model tidak dapat dimuat", 1)]
|
130 |
+
|
131 |
+
# Create the Gradio app with enhanced display
|
132 |
+
with gr.Blocks(title="Auto-Scoring Essay Indonesia", theme=gr.themes.Soft()) as app:
|
133 |
+
gr.Markdown("# π Sistem Auto-Scoring Essay Bahasa Indonesia")
|
134 |
+
gr.Markdown("Pilih pertanyaan dan masukkan jawaban siswa untuk mendapatkan prediksi nilai otomatis menggunakan AI.")
|
135 |
+
|
136 |
+
if model is None:
|
137 |
+
gr.Markdown("β οΈ **Model sedang dimuat atau tidak tersedia. Silakan coba lagi nanti.**")
|
138 |
+
|
139 |
+
with gr.Row():
|
140 |
+
with gr.Column():
|
141 |
+
question_input = gr.Dropdown(
|
142 |
+
label="π Pilih Pertanyaan",
|
143 |
+
choices=[info[0] for info in question_info],
|
144 |
+
value=question_info[0][0] if question_info else None
|
145 |
+
)
|
146 |
+
actual_code = gr.Textbox(visible=False) # Hidden field to store actual code
|
147 |
+
answer_input = gr.Textbox(
|
148 |
+
label="βοΈ Jawaban Siswa",
|
149 |
+
placeholder="Masukkan jawaban siswa di sini...",
|
150 |
+
lines=8,
|
151 |
+
max_lines=15
|
152 |
+
)
|
153 |
+
submit_btn = gr.Button("π Prediksi Nilai", variant="primary", size="lg")
|
154 |
+
|
155 |
+
with gr.Column():
|
156 |
+
output_score = gr.Textbox(label="π Hasil Prediksi", interactive=False)
|
157 |
+
output_question = gr.Textbox(label="β Detail Pertanyaan", interactive=False)
|
158 |
+
output_key = gr.Textbox(label="π Kunci Jawaban", interactive=False)
|
159 |
+
output_similarity = gr.Textbox(label="π Similarity Score", interactive=False)
|
160 |
+
|
161 |
+
# Function to update hidden code when question selection changes
|
162 |
+
def update_code(question_choice):
|
163 |
+
for info in question_info:
|
164 |
+
if info[0] == question_choice:
|
165 |
+
return info[1]
|
166 |
+
return question_info[0][1] if question_info else 1
|
167 |
+
|
168 |
+
question_input.change(update_code, inputs=question_input, outputs=actual_code)
|
169 |
+
|
170 |
+
submit_btn.click(
|
171 |
+
fn=gradio_predict,
|
172 |
+
inputs=[actual_code, answer_input],
|
173 |
+
outputs=[output_score, output_question, output_key, output_similarity]
|
174 |
+
)
|
175 |
+
|
176 |
+
# Add some examples if model is loaded
|
177 |
+
if jawaban_essay is not None and len(question_info) > 0:
|
178 |
+
gr.Markdown("## π‘ Contoh Jawaban")
|
179 |
+
examples = [
|
180 |
+
[question_info[0][0], "Ancaman keamanan jaringan meliputi malware seperti virus dan trojan, serangan DDoS yang dapat melumpuhkan server, serta upaya phishing untuk mencuri data pribadi."],
|
181 |
+
[question_info[0][0], "Kebocoran data, penyusupan akun, penghapusan data, spam dan phising merupakan ancaman utama dalam keamanan jaringan komputer."],
|
182 |
+
]
|
183 |
+
|
184 |
+
if len(question_info) > 1:
|
185 |
+
examples.append([
|
186 |
+
question_info[1][0],
|
187 |
+
"Jaringan komputer adalah kumpulan komputer yang terhubung untuk berbagi sumber daya seperti file, printer, dan koneksi internet."
|
188 |
+
])
|
189 |
+
|
190 |
+
gr.Examples(
|
191 |
+
examples=examples,
|
192 |
+
inputs=[question_input, answer_input],
|
193 |
+
outputs=[output_score, output_question, output_key, output_similarity],
|
194 |
+
fn=gradio_predict,
|
195 |
+
cache_examples=False
|
196 |
+
)
|
197 |
+
|
198 |
+
gr.Markdown("---")
|
199 |
+
gr.Markdown("*Sistem ini menggunakan AI untuk memberikan penilaian otomatis pada essay berbahasa Indonesia.*")
|
200 |
+
|
201 |
+
|
202 |
+
# Launch the app
|
203 |
+
if __name__ == "__main__":
|
204 |
+
print(torch.__version__)
|
205 |
+
app.launch()
|