Sourudra commited on
Commit
e4a2281
·
verified ·
1 Parent(s): 96b2151

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -8
app.py CHANGED
@@ -39,6 +39,19 @@ def calculate_similarity(doc1, doc2):
39
  similarity_score = minhash1.jaccard(minhash2)
40
  return similarity_score
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  # Function to handle the similarity calculation
43
  def similarity(doc1, doc2, file1=None, file2=None):
44
  text1 = ""
@@ -59,20 +72,25 @@ def similarity(doc1, doc2, file1=None, file2=None):
59
  else:
60
  return "Please provide either a DOCX file or paste the text for Document 2."
61
 
62
- return calculate_similarity(text1, text2)
 
63
 
64
  # Create a Gradio interface
65
  with gr.Blocks() as demo:
66
- gr.Markdown("## Document Similarity Checker")
 
 
67
  with gr.Row():
68
  with gr.Column():
69
- file1 = gr.File(label="Upload Document 1 (DOCX)")
70
- doc1 = gr.Textbox(label="Or Paste Text for Document 1", lines=10)
 
71
  with gr.Column():
72
- file2 = gr.File(label="Upload Document 2 (DOCX)")
73
- doc2 = gr.Textbox(label="Or Paste Text for Document 2", lines=10)
74
- output = gr.Textbox(label="Similarity Score")
75
- submit = gr.Button("Submit")
 
76
 
77
  submit.click(fn=similarity, inputs=[doc1, doc2, file1, file2], outputs=output)
78
 
 
39
  similarity_score = minhash1.jaccard(minhash2)
40
  return similarity_score
41
 
42
+ # Function to interpret similarity scores
43
+ def interpret_similarity(score):
44
+ if score == 1.0:
45
+ return "Exact Match! The documents are identical."
46
+ elif 0.8 <= score < 1.0:
47
+ return "High Similarity: The documents are very similar."
48
+ elif 0.5 <= score < 0.8:
49
+ return "Moderate Similarity: The documents share some content."
50
+ elif 0.2 <= score < 0.5:
51
+ return "Low Similarity: The documents have limited overlap."
52
+ else:
53
+ return "Very Low Similarity: The documents are mostly different."
54
+
55
  # Function to handle the similarity calculation
56
  def similarity(doc1, doc2, file1=None, file2=None):
57
  text1 = ""
 
72
  else:
73
  return "Please provide either a DOCX file or paste the text for Document 2."
74
 
75
+ score = calculate_similarity(text1, text2)
76
+ return f"Similarity Score: {score:.2f}\n{interpret_similarity(score)}"
77
 
78
  # Create a Gradio interface
79
  with gr.Blocks() as demo:
80
+ gr.Markdown("## 📄 Document Similarity Checker")
81
+ gr.Markdown(
82
+ "Compare two documents by uploading DOCX files or pasting text. The app calculates similarity using MinHash and provides an interpretative score.")
83
  with gr.Row():
84
  with gr.Column():
85
+ gr.Markdown("### Document 1")
86
+ file1 = gr.File(label="Upload DOCX File")
87
+ doc1 = gr.Textbox(label="Or Paste Text Here", lines=10, placeholder="Paste document text...")
88
  with gr.Column():
89
+ gr.Markdown("### Document 2")
90
+ file2 = gr.File(label="Upload DOCX File")
91
+ doc2 = gr.Textbox(label="Or Paste Text Here", lines=10, placeholder="Paste document text...")
92
+ output = gr.Textbox(label="Result", lines=3)
93
+ submit = gr.Button("Check Similarity", variant="primary")
94
 
95
  submit.click(fn=similarity, inputs=[doc1, doc2, file1, file2], outputs=output)
96