import streamlit as st import torch from transformers import AutoModelForSequenceClassification, AutoTokenizer import requests import re import numpy as np from fastapi import FastAPI from pydantic import BaseModel import uvicorn import threading # Load AI detection model MODEL_NAME = "roberta-base-openai-detector" tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME) # FastAPI App app = FastAPI() # API Input Model class TextInput(BaseModel): text: str # AI content detection function def detect_ai_content(text): inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True) outputs = model(**inputs) scores = torch.nn.functional.softmax(outputs.logits, dim=1) return scores[0][1].item() # AI probability # Writing style analysis def stylometry_analysis(text): words = text.split() avg_word_length = sum(len(word) for word in words) / len(words) if words else 0 complex_words_ratio = len([word for word in words if len(word) > 6]) / len(words) if words else 0 passive_voice_count = len(re.findall(r'\b(is|was|were|has been|have been|had been)\b \w+ed', text)) return { "avg_word_length": avg_word_length, "complex_words_ratio": complex_words_ratio, "passive_voice_count": passive_voice_count } # Semantic similarity analysis def semantic_analysis(text): keywords = ["AI", "generated", "neural network", "LLM", "GPT", "transformer"] ai_patterns = sum([text.lower().count(keyword) for keyword in keywords]) return ai_patterns / len(text.split()) if text.split() else 0 # FastAPI Route for AI Detection @app.post("/api/detect") def api_detect(data: TextInput): ai_probability = detect_ai_content(data.text) writing_features = stylometry_analysis(data.text) semantic_score = semantic_analysis(data.text) is_ai_generated = ( ai_probability > 0.5 or writing_features["complex_words_ratio"] > 0.4 or semantic_score > 0.2 ) result = "AI-Generated" if is_ai_generated else "Human-Written" return { "AI Probability": round(ai_probability, 2), "Complex Words Ratio": round(writing_features["complex_words_ratio"], 2), "Passive Voice Count": writing_features["passive_voice_count"], "Semantic Score": round(semantic_score, 2), "Final Verdict": result } # Streamlit UI st.title("Self-Learning AI Content Detector") st.write("Detect AI-generated text and analyze writing patterns.") text_input = st.text_area("Enter text to analyze:") if st.button("Analyze"): if text_input.strip(): response = requests.post("http://127.0.0.1:8000/api/detect", json={"text": text_input}) result = response.json() st.subheader("Detection Results") st.write(f"**AI Probability:** {result['AI Probability']:.2f}") st.write(f"**Complex Words Ratio:** {result['Complex Words Ratio']:.2f}") st.write(f"**Passive Voice Count:** {result['Passive Voice Count']}") st.write(f"**Semantic Score:** {result['Semantic Score']:.2f}") st.subheader(f"Final Verdict: {result['Final Verdict']}") else: st.error("Please enter some text for analysis.") # Running FastAPI in Background def run_fastapi(): uvicorn.run(app, host="0.0.0.0", port=8000) threading.Thread(target=run_fastapi, daemon=True).start()