Spaces:

SabkeSawaal68
/

ai-content-detector-advance

Running

App Files Files Community

ai-content-detector-advance / app.py

SabkeSawaal68

Update app.py

bc5dbea verified 5 months ago

raw

history blame contribute delete

3.42 kB

	import streamlit as st
	import torch
	from transformers import AutoModelForSequenceClassification, AutoTokenizer
	import requests
	import re
	import numpy as np
	from fastapi import FastAPI
	from pydantic import BaseModel
	import uvicorn
	import threading

	# Load AI detection model
	MODEL_NAME = "roberta-base-openai-detector"
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)

	# FastAPI App
	app = FastAPI()

	# API Input Model
	class TextInput(BaseModel):
	text: str

	# AI content detection function
	def detect_ai_content(text):
	inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
	outputs = model(**inputs)
	scores = torch.nn.functional.softmax(outputs.logits, dim=1)
	return scores[0][1].item() # AI probability

	# Writing style analysis
	def stylometry_analysis(text):
	words = text.split()
	avg_word_length = sum(len(word) for word in words) / len(words) if words else 0
	complex_words_ratio = len([word for word in words if len(word) > 6]) / len(words) if words else 0
	passive_voice_count = len(re.findall(r'\b(is\|was\|were\|has been\|have been\|had been)\b \w+ed', text))
	return {
	"avg_word_length": avg_word_length,
	"complex_words_ratio": complex_words_ratio,
	"passive_voice_count": passive_voice_count
	}

	# Semantic similarity analysis
	def semantic_analysis(text):
	keywords = ["AI", "generated", "neural network", "LLM", "GPT", "transformer"]
	ai_patterns = sum([text.lower().count(keyword) for keyword in keywords])
	return ai_patterns / len(text.split()) if text.split() else 0

	# FastAPI Route for AI Detection
	@app.post("/api/detect")
	def api_detect(data: TextInput):
	ai_probability = detect_ai_content(data.text)
	writing_features = stylometry_analysis(data.text)
	semantic_score = semantic_analysis(data.text)

	is_ai_generated = (
	ai_probability > 0.5 or
	writing_features["complex_words_ratio"] > 0.4 or
	semantic_score > 0.2
	)

	result = "AI-Generated" if is_ai_generated else "Human-Written"

	return {
	"AI Probability": round(ai_probability, 2),
	"Complex Words Ratio": round(writing_features["complex_words_ratio"], 2),
	"Passive Voice Count": writing_features["passive_voice_count"],
	"Semantic Score": round(semantic_score, 2),
	"Final Verdict": result
	}

	# Streamlit UI
	st.title("Self-Learning AI Content Detector")
	st.write("Detect AI-generated text and analyze writing patterns.")

	text_input = st.text_area("Enter text to analyze:")

	if st.button("Analyze"):
	if text_input.strip():
	response = requests.post("http://127.0.0.1:8000/api/detect", json={"text": text_input})
	result = response.json()

	st.subheader("Detection Results")
	st.write(f"AI Probability: {result['AI Probability']:.2f}")
	st.write(f"Complex Words Ratio: {result['Complex Words Ratio']:.2f}")
	st.write(f"Passive Voice Count: {result['Passive Voice Count']}")
	st.write(f"Semantic Score: {result['Semantic Score']:.2f}")
	st.subheader(f"Final Verdict: {result['Final Verdict']}")
	else:
	st.error("Please enter some text for analysis.")

	# Running FastAPI in Background
	def run_fastapi():
	uvicorn.run(app, host="0.0.0.0", port=8000)

	threading.Thread(target=run_fastapi, daemon=True).start()