import streamlit as st
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import requests
import re
import numpy as np
from fastapi import FastAPI
from pydantic import BaseModel
import uvicorn
import threading

# Load AI detection model
MODEL_NAME = "roberta-base-openai-detector"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)

# FastAPI App
app = FastAPI()

# API Input Model
class TextInput(BaseModel):
    text: str

# AI content detection function
def detect_ai_content(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    outputs = model(**inputs)
    scores = torch.nn.functional.softmax(outputs.logits, dim=1)
    return scores[0][1].item()  # AI probability

# Writing style analysis
def stylometry_analysis(text):
    words = text.split()
    avg_word_length = sum(len(word) for word in words) / len(words) if words else 0
    complex_words_ratio = len([word for word in words if len(word) > 6]) / len(words) if words else 0
    passive_voice_count = len(re.findall(r'\b(is|was|were|has been|have been|had been)\b \w+ed', text))
    return {
        "avg_word_length": avg_word_length,
        "complex_words_ratio": complex_words_ratio,
        "passive_voice_count": passive_voice_count
    }

# Semantic similarity analysis
def semantic_analysis(text):
    keywords = ["AI", "generated", "neural network", "LLM", "GPT", "transformer"]
    ai_patterns = sum([text.lower().count(keyword) for keyword in keywords])
    return ai_patterns / len(text.split()) if text.split() else 0

# FastAPI Route for AI Detection
@app.post("/api/detect")
def api_detect(data: TextInput):
    ai_probability = detect_ai_content(data.text)
    writing_features = stylometry_analysis(data.text)
    semantic_score = semantic_analysis(data.text)

    is_ai_generated = (
        ai_probability > 0.5 or 
        writing_features["complex_words_ratio"] > 0.4 or 
        semantic_score > 0.2
    )

    result = "AI-Generated" if is_ai_generated else "Human-Written"

    return {
        "AI Probability": round(ai_probability, 2),
        "Complex Words Ratio": round(writing_features["complex_words_ratio"], 2),
        "Passive Voice Count": writing_features["passive_voice_count"],
        "Semantic Score": round(semantic_score, 2),
        "Final Verdict": result
    }

# Streamlit UI
st.title("Self-Learning AI Content Detector")
st.write("Detect AI-generated text and analyze writing patterns.")

text_input = st.text_area("Enter text to analyze:")

if st.button("Analyze"):
    if text_input.strip():
        response = requests.post("http://127.0.0.1:8000/api/detect", json={"text": text_input})
        result = response.json()

        st.subheader("Detection Results")
        st.write(f"**AI Probability:** {result['AI Probability']:.2f}")
        st.write(f"**Complex Words Ratio:** {result['Complex Words Ratio']:.2f}")
        st.write(f"**Passive Voice Count:** {result['Passive Voice Count']}")
        st.write(f"**Semantic Score:** {result['Semantic Score']:.2f}")
        st.subheader(f"Final Verdict: {result['Final Verdict']}")
    else:
        st.error("Please enter some text for analysis.")

# Running FastAPI in Background
def run_fastapi():
    uvicorn.run(app, host="0.0.0.0", port=8000)

threading.Thread(target=run_fastapi, daemon=True).start()