Spaces:
Sleeping
Sleeping
test part updated
Browse files- __pycache__/pan22_verif_evaluator.cpython-313.pyc +0 -0
- app.py +61 -20
- model.pkl +3 -0
__pycache__/pan22_verif_evaluator.cpython-313.pyc
CHANGED
Binary files a/__pycache__/pan22_verif_evaluator.cpython-313.pyc and b/__pycache__/pan22_verif_evaluator.cpython-313.pyc differ
|
|
app.py
CHANGED
@@ -141,7 +141,7 @@ def train_model(pairs_file, truths_file, vocab_size, ngram_size, num_iterations,
|
|
141 |
# Gradio interface
|
142 |
def gradio_interface(pairs_file, truths_file, vocab_size, ngram_size, num_iterations, dropout):
|
143 |
if pairs_file is None or truths_file is None:
|
144 |
-
return "Please upload both JSON files."
|
145 |
|
146 |
try:
|
147 |
start_time = time.time()
|
@@ -167,37 +167,78 @@ def gradio_interface(pairs_file, truths_file, vocab_size, ngram_size, num_iterat
|
|
167 |
}
|
168 |
df = pd.DataFrame(data)
|
169 |
|
170 |
-
return training_message, df, gr.Group(visible=True), pickle_path
|
171 |
except Exception as e:
|
172 |
-
return f"An error occurred: {str(e)}",
|
173 |
|
174 |
with gr.Blocks() as iface:
|
175 |
gr.Markdown("# Character 4-grams Model")
|
176 |
-
gr.Markdown("Upload pairs.json and truths.json files, adjust parameters, then click 'Train' to train and evaluate the model.")
|
177 |
|
178 |
-
|
179 |
-
|
180 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
|
182 |
-
with gr.
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
|
190 |
-
status_box = gr.Textbox(label="Status")
|
191 |
|
192 |
-
with gr.Group(visible=False) as output_group:
|
193 |
-
gr.Markdown("## Evaluation Metrics")
|
194 |
-
output_table = gr.DataFrame()
|
195 |
-
download_button = gr.File(label="Download Model")
|
196 |
|
197 |
submit_btn.click(
|
198 |
gradio_interface,
|
199 |
inputs=[pairs_file, truths_file, vocab_size, ngram_size, num_iterations, dropout],
|
200 |
-
outputs=[status_box, output_table, output_group, download_button]
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
)
|
202 |
|
203 |
if __name__ == "__main__":
|
|
|
141 |
# Gradio interface
|
142 |
def gradio_interface(pairs_file, truths_file, vocab_size, ngram_size, num_iterations, dropout):
|
143 |
if pairs_file is None or truths_file is None:
|
144 |
+
return "Please upload both JSON files.", None, gr.Group(visible=False), None, None
|
145 |
|
146 |
try:
|
147 |
start_time = time.time()
|
|
|
167 |
}
|
168 |
df = pd.DataFrame(data)
|
169 |
|
170 |
+
return training_message, df, gr.Group(visible=True), pickle_path, pickle_path
|
171 |
except Exception as e:
|
172 |
+
return f"An error occurred: {str(e)}", None, gr.Group(visible=False), None, None
|
173 |
|
174 |
with gr.Blocks() as iface:
|
175 |
gr.Markdown("# Character 4-grams Model")
|
|
|
176 |
|
177 |
+
model_path = gr.State(None)
|
178 |
+
|
179 |
+
with gr.Tab("Train"):
|
180 |
+
gr.Markdown("Upload pairs.json and truths.json files, adjust parameters, then click 'Train' to train and evaluate the model.")
|
181 |
+
with gr.Row():
|
182 |
+
pairs_file = gr.File(label="Upload pairs.json")
|
183 |
+
truths_file = gr.File(label="Upload truths.json")
|
184 |
+
|
185 |
+
with gr.Row():
|
186 |
+
vocab_size = gr.Slider(minimum=1000, maximum=50000, step=100, value=3000, label="Vocabulary Size")
|
187 |
+
ngram_size = gr.Slider(minimum=2, maximum=6, step=1, value=4, label="N-gram Size")
|
188 |
+
num_iterations = gr.Slider(minimum=0, maximum=100, step=1, value=0, label="Number of Iterations")
|
189 |
+
dropout = gr.Slider(minimum=0.1, maximum=0.9, step=0.1, value=0.5, label="Dropout")
|
190 |
+
|
191 |
+
submit_btn = gr.Button("Train")
|
192 |
+
|
193 |
+
status_box = gr.Textbox(label="Status")
|
194 |
+
|
195 |
+
with gr.Group(visible=False) as output_group:
|
196 |
+
gr.Markdown("## Evaluation Metrics")
|
197 |
+
output_table = gr.DataFrame()
|
198 |
+
download_button = gr.File(label="Download Model")
|
199 |
|
200 |
+
with gr.Tab('Test'):
|
201 |
+
gr.Markdown("Enter two texts to compare and click 'Predict' to estimate their similarity.")
|
202 |
+
text1 = gr.Textbox(label="Text 1")
|
203 |
+
text2 = gr.Textbox(label="Text 2")
|
204 |
+
predict_btn = gr.Button("Predict")
|
205 |
+
similarity_output = gr.Textbox(label="Similarity Result")
|
206 |
+
|
207 |
+
def test_model(text1, text2, model_path):
|
208 |
+
if model_path is None:
|
209 |
+
return "Please train the model first."
|
210 |
+
|
211 |
+
model = pickle.load(open(model_path, 'rb'))
|
212 |
+
vectorizer = model['vectorizer']
|
213 |
+
opt_p1 = model['opt_p1']
|
214 |
+
opt_p2 = model['opt_p2']
|
215 |
+
num_iterations = model['rnd_feature_idxs'] is not None
|
216 |
+
rnd_feature_idxs = model['rnd_feature_idxs']
|
217 |
+
|
218 |
+
x1, x2 = vectorizer.transform([text1, text2]).toarray()
|
219 |
+
if num_iterations:
|
220 |
+
similarities_ = []
|
221 |
+
for i in range(len(rnd_feature_idxs)):
|
222 |
+
similarities_.append(cosine_sim(x1[rnd_feature_idxs[i, :]], x2[rnd_feature_idxs[i, :]]))
|
223 |
+
similarity = np.mean(similarities_)
|
224 |
+
else:
|
225 |
+
similarity = cosine_sim(x1, x2)
|
226 |
+
|
227 |
+
similarity = np.array(list(correct_scores([similarity], p1=opt_p1, p2=opt_p2)))[0]
|
228 |
+
return f"Similarity: {similarity:.4f}"
|
229 |
|
|
|
230 |
|
|
|
|
|
|
|
|
|
231 |
|
232 |
submit_btn.click(
|
233 |
gradio_interface,
|
234 |
inputs=[pairs_file, truths_file, vocab_size, ngram_size, num_iterations, dropout],
|
235 |
+
outputs=[status_box, output_table, output_group, download_button, model_path]
|
236 |
+
)
|
237 |
+
|
238 |
+
predict_btn.click(
|
239 |
+
test_model,
|
240 |
+
inputs=[text1, text2, model_path],
|
241 |
+
outputs=[similarity_output]
|
242 |
)
|
243 |
|
244 |
if __name__ == "__main__":
|
model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e3d860d4442980ec7e79ee1c184a5e5855cd416d7abbc1fa5a8fefb188edba3
|
3 |
+
size 133165
|