Spaces:

hen8001
/

char_4grams_av

Sleeping

App Files Files Community

hen8001 commited on Nov 1, 2024

Commit

7f93fc7

1 Parent(s): 045668d

test part updated

Browse files

Files changed (3) hide show

__pycache__/pan22_verif_evaluator.cpython-313.pyc +0 -0
app.py +61 -20
model.pkl +3 -0

__pycache__/pan22_verif_evaluator.cpython-313.pyc CHANGED Viewed

Binary files a/__pycache__/pan22_verif_evaluator.cpython-313.pyc and b/__pycache__/pan22_verif_evaluator.cpython-313.pyc differ

app.py CHANGED Viewed

@@ -141,7 +141,7 @@ def train_model(pairs_file, truths_file, vocab_size, ngram_size, num_iterations,
 # Gradio interface
 def gradio_interface(pairs_file, truths_file, vocab_size, ngram_size, num_iterations, dropout):
     if pairs_file is None or truths_file is None:
-        return "Please upload both JSON files."
     try:
         start_time = time.time()
@@ -167,37 +167,78 @@ def gradio_interface(pairs_file, truths_file, vocab_size, ngram_size, num_iterat
         }
         df = pd.DataFrame(data)
-        return training_message, df, gr.Group(visible=True), pickle_path
     except Exception as e:
-        return f"An error occurred: {str(e)}", gr.DataFrame(visible=False), gr.Group(visible=False), None
 with gr.Blocks() as iface:
         gr.Markdown("# Character 4-grams Model")
-        gr.Markdown("Upload pairs.json and truths.json files, adjust parameters, then click 'Train' to train and evaluate the model.")
-        with gr.Row():
-            pairs_file = gr.File(label="Upload pairs.json")
-            truths_file = gr.File(label="Upload truths.json")
-        with gr.Row():
-            vocab_size = gr.Slider(minimum=1000, maximum=50000, step=100, value=3000, label="Vocabulary Size")
-            ngram_size = gr.Slider(minimum=2, maximum=6, step=1, value=4, label="N-gram Size")
-            num_iterations = gr.Slider(minimum=0, maximum=100, step=1, value=0, label="Number of Iterations")
-            dropout = gr.Slider(minimum=0.1, maximum=0.9, step=0.1, value=0.5, label="Dropout")
-        submit_btn = gr.Button("Train")
-        status_box = gr.Textbox(label="Status")
-        with gr.Group(visible=False) as output_group:
-            gr.Markdown("## Evaluation Metrics")
-            output_table = gr.DataFrame()
-            download_button = gr.File(label="Download Model")
         submit_btn.click(
             gradio_interface,
             inputs=[pairs_file, truths_file, vocab_size, ngram_size, num_iterations, dropout],
-            outputs=[status_box, output_table, output_group, download_button]
         )
 if __name__ == "__main__":

 # Gradio interface
 def gradio_interface(pairs_file, truths_file, vocab_size, ngram_size, num_iterations, dropout):
     if pairs_file is None or truths_file is None:
+        return "Please upload both JSON files.", None, gr.Group(visible=False), None, None
     try:
         start_time = time.time()
         }
         df = pd.DataFrame(data)
+        return training_message, df, gr.Group(visible=True), pickle_path, pickle_path
     except Exception as e:
+        return f"An error occurred: {str(e)}", None, gr.Group(visible=False), None, None
 with gr.Blocks() as iface:
         gr.Markdown("# Character 4-grams Model")
+        model_path = gr.State(None)
+        with gr.Tab("Train"):
+            gr.Markdown("Upload pairs.json and truths.json files, adjust parameters, then click 'Train' to train and evaluate the model.")
+            with gr.Row():
+                pairs_file = gr.File(label="Upload pairs.json")
+                truths_file = gr.File(label="Upload truths.json")
+            with gr.Row():
+                vocab_size = gr.Slider(minimum=1000, maximum=50000, step=100, value=3000, label="Vocabulary Size")
+                ngram_size = gr.Slider(minimum=2, maximum=6, step=1, value=4, label="N-gram Size")
+                num_iterations = gr.Slider(minimum=0, maximum=100, step=1, value=0, label="Number of Iterations")
+                dropout = gr.Slider(minimum=0.1, maximum=0.9, step=0.1, value=0.5, label="Dropout")
+            submit_btn = gr.Button("Train")
+            status_box = gr.Textbox(label="Status")
+            with gr.Group(visible=False) as output_group:
+                gr.Markdown("## Evaluation Metrics")
+                output_table = gr.DataFrame()
+                download_button = gr.File(label="Download Model")
+        with gr.Tab('Test'):
+            gr.Markdown("Enter two texts to compare and click 'Predict' to estimate their similarity.")
+            text1 = gr.Textbox(label="Text 1")
+            text2 = gr.Textbox(label="Text 2")
+            predict_btn = gr.Button("Predict")
+            similarity_output = gr.Textbox(label="Similarity Result")
+        def test_model(text1, text2, model_path):
+            if model_path is None:
+                return "Please train the model first."
+            model = pickle.load(open(model_path, 'rb'))
+            vectorizer = model['vectorizer']
+            opt_p1 = model['opt_p1']
+            opt_p2 = model['opt_p2']
+            num_iterations = model['rnd_feature_idxs'] is not None
+            rnd_feature_idxs = model['rnd_feature_idxs']
+            x1, x2 = vectorizer.transform([text1, text2]).toarray()
+            if num_iterations:
+                similarities_ = []
+                for i in range(len(rnd_feature_idxs)):
+                    similarities_.append(cosine_sim(x1[rnd_feature_idxs[i, :]], x2[rnd_feature_idxs[i, :]]))
+                similarity = np.mean(similarities_)
+            else:
+                similarity = cosine_sim(x1, x2)
+            similarity = np.array(list(correct_scores([similarity], p1=opt_p1, p2=opt_p2)))[0]
+            return f"Similarity: {similarity:.4f}"
         submit_btn.click(
             gradio_interface,
             inputs=[pairs_file, truths_file, vocab_size, ngram_size, num_iterations, dropout],
+            outputs=[status_box, output_table, output_group, download_button, model_path]
+        )
+        predict_btn.click(
+            test_model,
+            inputs=[text1, text2, model_path],
+            outputs=[similarity_output]
         )
 if __name__ == "__main__":

model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0e3d860d4442980ec7e79ee1c184a5e5855cd416d7abbc1fa5a8fefb188edba3
+size 133165