Spaces:
Running
Running
from fastapi import FastAPI, HTTPException, UploadFile, File | |
import fitz # PyMuPDF for PDF handling | |
from transformers import pipeline | |
from sentence_transformers import SentenceTransformer, util | |
import pandas as pd | |
import os | |
app = FastAPI() | |
# Load AI detection model | |
ai_detector = pipeline("text-classification", model="roberta-base-openai-detector") | |
text_model = SentenceTransformer("all-MiniLM-L6-v2") | |
TEXT_DB = "text_plagiarism.csv" | |
def load_database(): | |
if not os.path.exists(TEXT_DB): | |
pd.DataFrame(columns=["content", "plagiarism_score"]).to_csv(TEXT_DB, index=False) | |
return pd.read_csv(TEXT_DB) | |
def save_to_database(content, plagiarism_score): | |
df = load_database() | |
new_entry = pd.DataFrame({"content": [content], "plagiarism_score": [plagiarism_score]}) | |
df = pd.concat([df, new_entry], ignore_index=True) | |
df.to_csv(TEXT_DB, index=False) | |
def check_text(text: str): | |
try: | |
stored_texts = load_database()["content"].tolist() | |
if stored_texts: | |
embeddings1 = text_model.encode(text, convert_to_tensor=True) | |
embeddings2 = text_model.encode(stored_texts, convert_to_tensor=True) | |
similarities = util.pytorch_cos_sim(embeddings1, embeddings2).tolist()[0] | |
highest_similarity = max(similarities) * 100 if similarities else 0 | |
else: | |
highest_similarity = 0 | |
save_to_database(text, highest_similarity) | |
return {"plagiarism_score": highest_similarity} | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=f"Error checking plagiarism: {str(e)}") | |
def detect_ai(text: str): | |
try: | |
result = ai_detector(text) | |
return {"AI_Detection": result[0]["label"], "Confidence": result[0]["score"]} | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=f"Error in AI Detection: {str(e)}") | |
async def upload_pdf(file: UploadFile = File(...)): | |
try: | |
pdf_content = file.file.read() | |
doc = fitz.open(stream=pdf_content, filetype="pdf") | |
text = " ".join([page.get_text(\"text\") for page in doc]) | |
return check_text(text) | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=f"PDF Processing Error: {str(e)}") | |