plagrism_detection / backend.py
indhupamula's picture
Update backend.py
b2c7949 verified
from fastapi import FastAPI, HTTPException, UploadFile, File
import fitz # PyMuPDF for PDF handling
from transformers import pipeline
from sentence_transformers import SentenceTransformer, util
import pandas as pd
import os
app = FastAPI()
# Load AI detection model
ai_detector = pipeline("text-classification", model="roberta-base-openai-detector")
text_model = SentenceTransformer("all-MiniLM-L6-v2")
TEXT_DB = "text_plagiarism.csv"
def load_database():
if not os.path.exists(TEXT_DB):
pd.DataFrame(columns=["content", "plagiarism_score"]).to_csv(TEXT_DB, index=False)
return pd.read_csv(TEXT_DB)
def save_to_database(content, plagiarism_score):
df = load_database()
new_entry = pd.DataFrame({"content": [content], "plagiarism_score": [plagiarism_score]})
df = pd.concat([df, new_entry], ignore_index=True)
df.to_csv(TEXT_DB, index=False)
@app.post("/check_text")
def check_text(text: str):
try:
stored_texts = load_database()["content"].tolist()
if stored_texts:
embeddings1 = text_model.encode(text, convert_to_tensor=True)
embeddings2 = text_model.encode(stored_texts, convert_to_tensor=True)
similarities = util.pytorch_cos_sim(embeddings1, embeddings2).tolist()[0]
highest_similarity = max(similarities) * 100 if similarities else 0
else:
highest_similarity = 0
save_to_database(text, highest_similarity)
return {"plagiarism_score": highest_similarity}
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error checking plagiarism: {str(e)}")
@app.post("/detect_ai")
def detect_ai(text: str):
try:
result = ai_detector(text)
return {"AI_Detection": result[0]["label"], "Confidence": result[0]["score"]}
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error in AI Detection: {str(e)}")
@app.post("/upload_pdf")
async def upload_pdf(file: UploadFile = File(...)):
try:
pdf_content = file.file.read()
doc = fitz.open(stream=pdf_content, filetype="pdf")
text = " ".join([page.get_text(\"text\") for page in doc])
return check_text(text)
except Exception as e:
raise HTTPException(status_code=500, detail=f"PDF Processing Error: {str(e)}")