File size: 3,423 Bytes
10ae628
 
 
 
 
9a9eadb
bc5dbea
 
 
 
10ae628
9a9eadb
10ae628
 
 
 
bc5dbea
 
 
 
 
 
 
9a9eadb
10ae628
 
 
 
9a9eadb
10ae628
9a9eadb
10ae628
 
 
 
 
9a9eadb
 
 
 
 
10ae628
9a9eadb
10ae628
 
 
 
 
bc5dbea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9a9eadb
10ae628
9a9eadb
bc5dbea
9a9eadb
 
10ae628
9a9eadb
 
bc5dbea
 
10ae628
9a9eadb
bc5dbea
 
 
 
 
9a9eadb
bc5dbea
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import streamlit as st
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import requests
import re
import numpy as np
from fastapi import FastAPI
from pydantic import BaseModel
import uvicorn
import threading

# Load AI detection model
MODEL_NAME = "roberta-base-openai-detector"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)

# FastAPI App
app = FastAPI()

# API Input Model
class TextInput(BaseModel):
    text: str

# AI content detection function
def detect_ai_content(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    outputs = model(**inputs)
    scores = torch.nn.functional.softmax(outputs.logits, dim=1)
    return scores[0][1].item()  # AI probability

# Writing style analysis
def stylometry_analysis(text):
    words = text.split()
    avg_word_length = sum(len(word) for word in words) / len(words) if words else 0
    complex_words_ratio = len([word for word in words if len(word) > 6]) / len(words) if words else 0
    passive_voice_count = len(re.findall(r'\b(is|was|were|has been|have been|had been)\b \w+ed', text))
    return {
        "avg_word_length": avg_word_length,
        "complex_words_ratio": complex_words_ratio,
        "passive_voice_count": passive_voice_count
    }

# Semantic similarity analysis
def semantic_analysis(text):
    keywords = ["AI", "generated", "neural network", "LLM", "GPT", "transformer"]
    ai_patterns = sum([text.lower().count(keyword) for keyword in keywords])
    return ai_patterns / len(text.split()) if text.split() else 0

# FastAPI Route for AI Detection
@app.post("/api/detect")
def api_detect(data: TextInput):
    ai_probability = detect_ai_content(data.text)
    writing_features = stylometry_analysis(data.text)
    semantic_score = semantic_analysis(data.text)

    is_ai_generated = (
        ai_probability > 0.5 or 
        writing_features["complex_words_ratio"] > 0.4 or 
        semantic_score > 0.2
    )

    result = "AI-Generated" if is_ai_generated else "Human-Written"

    return {
        "AI Probability": round(ai_probability, 2),
        "Complex Words Ratio": round(writing_features["complex_words_ratio"], 2),
        "Passive Voice Count": writing_features["passive_voice_count"],
        "Semantic Score": round(semantic_score, 2),
        "Final Verdict": result
    }

# Streamlit UI
st.title("Self-Learning AI Content Detector")
st.write("Detect AI-generated text and analyze writing patterns.")

text_input = st.text_area("Enter text to analyze:")

if st.button("Analyze"):
    if text_input.strip():
        response = requests.post("http://127.0.0.1:8000/api/detect", json={"text": text_input})
        result = response.json()

        st.subheader("Detection Results")
        st.write(f"**AI Probability:** {result['AI Probability']:.2f}")
        st.write(f"**Complex Words Ratio:** {result['Complex Words Ratio']:.2f}")
        st.write(f"**Passive Voice Count:** {result['Passive Voice Count']}")
        st.write(f"**Semantic Score:** {result['Semantic Score']:.2f}")
        st.subheader(f"Final Verdict: {result['Final Verdict']}")
    else:
        st.error("Please enter some text for analysis.")

# Running FastAPI in Background
def run_fastapi():
    uvicorn.run(app, host="0.0.0.0", port=8000)

threading.Thread(target=run_fastapi, daemon=True).start()