hen8001 commited on
Commit
7f93fc7
·
1 Parent(s): 045668d

test part updated

Browse files
__pycache__/pan22_verif_evaluator.cpython-313.pyc CHANGED
Binary files a/__pycache__/pan22_verif_evaluator.cpython-313.pyc and b/__pycache__/pan22_verif_evaluator.cpython-313.pyc differ
 
app.py CHANGED
@@ -141,7 +141,7 @@ def train_model(pairs_file, truths_file, vocab_size, ngram_size, num_iterations,
141
  # Gradio interface
142
  def gradio_interface(pairs_file, truths_file, vocab_size, ngram_size, num_iterations, dropout):
143
  if pairs_file is None or truths_file is None:
144
- return "Please upload both JSON files."
145
 
146
  try:
147
  start_time = time.time()
@@ -167,37 +167,78 @@ def gradio_interface(pairs_file, truths_file, vocab_size, ngram_size, num_iterat
167
  }
168
  df = pd.DataFrame(data)
169
 
170
- return training_message, df, gr.Group(visible=True), pickle_path
171
  except Exception as e:
172
- return f"An error occurred: {str(e)}", gr.DataFrame(visible=False), gr.Group(visible=False), None
173
 
174
  with gr.Blocks() as iface:
175
  gr.Markdown("# Character 4-grams Model")
176
- gr.Markdown("Upload pairs.json and truths.json files, adjust parameters, then click 'Train' to train and evaluate the model.")
177
 
178
- with gr.Row():
179
- pairs_file = gr.File(label="Upload pairs.json")
180
- truths_file = gr.File(label="Upload truths.json")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
 
182
- with gr.Row():
183
- vocab_size = gr.Slider(minimum=1000, maximum=50000, step=100, value=3000, label="Vocabulary Size")
184
- ngram_size = gr.Slider(minimum=2, maximum=6, step=1, value=4, label="N-gram Size")
185
- num_iterations = gr.Slider(minimum=0, maximum=100, step=1, value=0, label="Number of Iterations")
186
- dropout = gr.Slider(minimum=0.1, maximum=0.9, step=0.1, value=0.5, label="Dropout")
187
-
188
- submit_btn = gr.Button("Train")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
 
190
- status_box = gr.Textbox(label="Status")
191
 
192
- with gr.Group(visible=False) as output_group:
193
- gr.Markdown("## Evaluation Metrics")
194
- output_table = gr.DataFrame()
195
- download_button = gr.File(label="Download Model")
196
 
197
  submit_btn.click(
198
  gradio_interface,
199
  inputs=[pairs_file, truths_file, vocab_size, ngram_size, num_iterations, dropout],
200
- outputs=[status_box, output_table, output_group, download_button]
 
 
 
 
 
 
201
  )
202
 
203
  if __name__ == "__main__":
 
141
  # Gradio interface
142
  def gradio_interface(pairs_file, truths_file, vocab_size, ngram_size, num_iterations, dropout):
143
  if pairs_file is None or truths_file is None:
144
+ return "Please upload both JSON files.", None, gr.Group(visible=False), None, None
145
 
146
  try:
147
  start_time = time.time()
 
167
  }
168
  df = pd.DataFrame(data)
169
 
170
+ return training_message, df, gr.Group(visible=True), pickle_path, pickle_path
171
  except Exception as e:
172
+ return f"An error occurred: {str(e)}", None, gr.Group(visible=False), None, None
173
 
174
  with gr.Blocks() as iface:
175
  gr.Markdown("# Character 4-grams Model")
 
176
 
177
+ model_path = gr.State(None)
178
+
179
+ with gr.Tab("Train"):
180
+ gr.Markdown("Upload pairs.json and truths.json files, adjust parameters, then click 'Train' to train and evaluate the model.")
181
+ with gr.Row():
182
+ pairs_file = gr.File(label="Upload pairs.json")
183
+ truths_file = gr.File(label="Upload truths.json")
184
+
185
+ with gr.Row():
186
+ vocab_size = gr.Slider(minimum=1000, maximum=50000, step=100, value=3000, label="Vocabulary Size")
187
+ ngram_size = gr.Slider(minimum=2, maximum=6, step=1, value=4, label="N-gram Size")
188
+ num_iterations = gr.Slider(minimum=0, maximum=100, step=1, value=0, label="Number of Iterations")
189
+ dropout = gr.Slider(minimum=0.1, maximum=0.9, step=0.1, value=0.5, label="Dropout")
190
+
191
+ submit_btn = gr.Button("Train")
192
+
193
+ status_box = gr.Textbox(label="Status")
194
+
195
+ with gr.Group(visible=False) as output_group:
196
+ gr.Markdown("## Evaluation Metrics")
197
+ output_table = gr.DataFrame()
198
+ download_button = gr.File(label="Download Model")
199
 
200
+ with gr.Tab('Test'):
201
+ gr.Markdown("Enter two texts to compare and click 'Predict' to estimate their similarity.")
202
+ text1 = gr.Textbox(label="Text 1")
203
+ text2 = gr.Textbox(label="Text 2")
204
+ predict_btn = gr.Button("Predict")
205
+ similarity_output = gr.Textbox(label="Similarity Result")
206
+
207
+ def test_model(text1, text2, model_path):
208
+ if model_path is None:
209
+ return "Please train the model first."
210
+
211
+ model = pickle.load(open(model_path, 'rb'))
212
+ vectorizer = model['vectorizer']
213
+ opt_p1 = model['opt_p1']
214
+ opt_p2 = model['opt_p2']
215
+ num_iterations = model['rnd_feature_idxs'] is not None
216
+ rnd_feature_idxs = model['rnd_feature_idxs']
217
+
218
+ x1, x2 = vectorizer.transform([text1, text2]).toarray()
219
+ if num_iterations:
220
+ similarities_ = []
221
+ for i in range(len(rnd_feature_idxs)):
222
+ similarities_.append(cosine_sim(x1[rnd_feature_idxs[i, :]], x2[rnd_feature_idxs[i, :]]))
223
+ similarity = np.mean(similarities_)
224
+ else:
225
+ similarity = cosine_sim(x1, x2)
226
+
227
+ similarity = np.array(list(correct_scores([similarity], p1=opt_p1, p2=opt_p2)))[0]
228
+ return f"Similarity: {similarity:.4f}"
229
 
 
230
 
 
 
 
 
231
 
232
  submit_btn.click(
233
  gradio_interface,
234
  inputs=[pairs_file, truths_file, vocab_size, ngram_size, num_iterations, dropout],
235
+ outputs=[status_box, output_table, output_group, download_button, model_path]
236
+ )
237
+
238
+ predict_btn.click(
239
+ test_model,
240
+ inputs=[text1, text2, model_path],
241
+ outputs=[similarity_output]
242
  )
243
 
244
  if __name__ == "__main__":
model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e3d860d4442980ec7e79ee1c184a5e5855cd416d7abbc1fa5a8fefb188edba3
3
+ size 133165