indhupamula commited on
Commit
5bee5f1
·
verified ·
1 Parent(s): 1b968e9

Create plagrism.py

Browse files
Files changed (1) hide show
  1. plagrism.py +44 -0
plagrism.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import numpy as np
4
+ import pandas as pd
5
+ from sentence_transformers import SentenceTransformer, util
6
+
7
+ # Load AI model (BERT)
8
+ model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
9
+
10
+ # Function to check plagiarism
11
+ def check_plagiarism(text, stored_texts):
12
+ documents = stored_texts + [text]
13
+ embeddings = model.encode(documents, convert_to_tensor=True)
14
+ similarity_matrix = util.pytorch_cos_sim(embeddings[-1], embeddings[:-1])
15
+
16
+ highest_similarity = np.max(similarity_matrix.numpy())
17
+ return highest_similarity * 100
18
+
19
+ # Store past documents
20
+ if not os.path.exists("database.csv"):
21
+ pd.DataFrame(columns=["text"]).to_csv("database.csv", index=False)
22
+
23
+ def load_database():
24
+ df = pd.read_csv("database.csv")
25
+ return df["text"].tolist()
26
+
27
+ def save_to_database(text):
28
+ df = pd.read_csv("database.csv")
29
+ new_df = pd.DataFrame({"text": [text]})
30
+ df = pd.concat([df, new_df], ignore_index=True)
31
+ df.to_csv("database.csv", index=False)
32
+
33
+ # Streamlit UI
34
+ st.title("Plagiarism Detection System using AI")
35
+ input_text = st.text_area("Enter text to check for plagiarism")
36
+
37
+ if st.button("Check Plagiarism"):
38
+ stored_texts = load_database()
39
+ similarity_score = check_plagiarism(input_text, stored_texts)
40
+ st.write(f"Plagiarism Score: {similarity_score:.2f}%")
41
+
42
+ if similarity_score < 50:
43
+ save_to_database(input_text)
44
+ st.success("Text added to database for future reference.")