|
import streamlit as st |
|
import torch |
|
from transformers import AutoModelForSequenceClassification, AutoTokenizer |
|
import requests |
|
import re |
|
import numpy as np |
|
from fastapi import FastAPI |
|
from pydantic import BaseModel |
|
import uvicorn |
|
import threading |
|
|
|
|
|
MODEL_NAME = "roberta-base-openai-detector" |
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) |
|
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME) |
|
|
|
|
|
app = FastAPI() |
|
|
|
|
|
class TextInput(BaseModel): |
|
text: str |
|
|
|
|
|
def detect_ai_content(text): |
|
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True) |
|
outputs = model(**inputs) |
|
scores = torch.nn.functional.softmax(outputs.logits, dim=1) |
|
return scores[0][1].item() |
|
|
|
|
|
def stylometry_analysis(text): |
|
words = text.split() |
|
avg_word_length = sum(len(word) for word in words) / len(words) if words else 0 |
|
complex_words_ratio = len([word for word in words if len(word) > 6]) / len(words) if words else 0 |
|
passive_voice_count = len(re.findall(r'\b(is|was|were|has been|have been|had been)\b \w+ed', text)) |
|
return { |
|
"avg_word_length": avg_word_length, |
|
"complex_words_ratio": complex_words_ratio, |
|
"passive_voice_count": passive_voice_count |
|
} |
|
|
|
|
|
def semantic_analysis(text): |
|
keywords = ["AI", "generated", "neural network", "LLM", "GPT", "transformer"] |
|
ai_patterns = sum([text.lower().count(keyword) for keyword in keywords]) |
|
return ai_patterns / len(text.split()) if text.split() else 0 |
|
|
|
|
|
@app.post("/api/detect") |
|
def api_detect(data: TextInput): |
|
ai_probability = detect_ai_content(data.text) |
|
writing_features = stylometry_analysis(data.text) |
|
semantic_score = semantic_analysis(data.text) |
|
|
|
is_ai_generated = ( |
|
ai_probability > 0.5 or |
|
writing_features["complex_words_ratio"] > 0.4 or |
|
semantic_score > 0.2 |
|
) |
|
|
|
result = "AI-Generated" if is_ai_generated else "Human-Written" |
|
|
|
return { |
|
"AI Probability": round(ai_probability, 2), |
|
"Complex Words Ratio": round(writing_features["complex_words_ratio"], 2), |
|
"Passive Voice Count": writing_features["passive_voice_count"], |
|
"Semantic Score": round(semantic_score, 2), |
|
"Final Verdict": result |
|
} |
|
|
|
|
|
st.title("Self-Learning AI Content Detector") |
|
st.write("Detect AI-generated text and analyze writing patterns.") |
|
|
|
text_input = st.text_area("Enter text to analyze:") |
|
|
|
if st.button("Analyze"): |
|
if text_input.strip(): |
|
response = requests.post("http://127.0.0.1:8000/api/detect", json={"text": text_input}) |
|
result = response.json() |
|
|
|
st.subheader("Detection Results") |
|
st.write(f"**AI Probability:** {result['AI Probability']:.2f}") |
|
st.write(f"**Complex Words Ratio:** {result['Complex Words Ratio']:.2f}") |
|
st.write(f"**Passive Voice Count:** {result['Passive Voice Count']}") |
|
st.write(f"**Semantic Score:** {result['Semantic Score']:.2f}") |
|
st.subheader(f"Final Verdict: {result['Final Verdict']}") |
|
else: |
|
st.error("Please enter some text for analysis.") |
|
|
|
|
|
def run_fastapi(): |
|
uvicorn.run(app, host="0.0.0.0", port=8000) |
|
|
|
threading.Thread(target=run_fastapi, daemon=True).start() |