Update app.py
Browse files
app.py
CHANGED
@@ -4,14 +4,23 @@ from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
|
4 |
import requests
|
5 |
import re
|
6 |
import numpy as np
|
7 |
-
from
|
8 |
-
from
|
|
|
|
|
9 |
|
10 |
# Load AI detection model
|
11 |
MODEL_NAME = "roberta-base-openai-detector"
|
12 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
13 |
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
# AI content detection function
|
16 |
def detect_ai_content(text):
|
17 |
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
|
@@ -37,64 +46,51 @@ def semantic_analysis(text):
|
|
37 |
ai_patterns = sum([text.lower().count(keyword) for keyword in keywords])
|
38 |
return ai_patterns / len(text.split()) if text.split() else 0
|
39 |
|
40 |
-
#
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
return
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
if ai_score < 0.5:
|
63 |
-
# Fine-tune model logic (this will need cloud-based model retraining)
|
64 |
-
pass
|
65 |
|
66 |
# Streamlit UI
|
67 |
st.title("Self-Learning AI Content Detector")
|
68 |
-
st.write("Detect AI-generated text and
|
69 |
|
70 |
text_input = st.text_area("Enter text to analyze:")
|
71 |
|
72 |
if st.button("Analyze"):
|
73 |
if text_input.strip():
|
74 |
-
|
75 |
-
|
76 |
-
semantic_score = semantic_analysis(text_input)
|
77 |
-
is_plagiarized = check_plagiarism(text_input)
|
78 |
-
|
79 |
-
# Multi-layer AI detection logic
|
80 |
-
is_ai_generated = (
|
81 |
-
ai_probability > 0.5 or
|
82 |
-
writing_features["complex_words_ratio"] > 0.4 or
|
83 |
-
semantic_score > 0.2 or
|
84 |
-
is_plagiarized
|
85 |
-
)
|
86 |
-
|
87 |
-
result = "AI-Generated" if is_ai_generated else "Human-Written"
|
88 |
|
89 |
st.subheader("Detection Results")
|
90 |
-
st.write(f"**AI Probability:** {
|
91 |
-
st.write(f"**Complex Words Ratio:** {
|
92 |
-
st.write(f"**Passive Voice Count:** {
|
93 |
-
st.write(f"**Semantic Score:** {
|
94 |
-
st.
|
95 |
-
st.subheader(f"Final Verdict: {result}")
|
96 |
-
|
97 |
-
# Auto-learn from new AI patterns
|
98 |
-
update_model_with_new_patterns()
|
99 |
else:
|
100 |
-
st.error("Please enter some text for analysis.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
import requests
|
5 |
import re
|
6 |
import numpy as np
|
7 |
+
from fastapi import FastAPI
|
8 |
+
from pydantic import BaseModel
|
9 |
+
import uvicorn
|
10 |
+
import threading
|
11 |
|
12 |
# Load AI detection model
|
13 |
MODEL_NAME = "roberta-base-openai-detector"
|
14 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
15 |
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
|
16 |
|
17 |
+
# FastAPI App
|
18 |
+
app = FastAPI()
|
19 |
+
|
20 |
+
# API Input Model
|
21 |
+
class TextInput(BaseModel):
|
22 |
+
text: str
|
23 |
+
|
24 |
# AI content detection function
|
25 |
def detect_ai_content(text):
|
26 |
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
|
|
|
46 |
ai_patterns = sum([text.lower().count(keyword) for keyword in keywords])
|
47 |
return ai_patterns / len(text.split()) if text.split() else 0
|
48 |
|
49 |
+
# FastAPI Route for AI Detection
|
50 |
+
@app.post("/api/detect")
|
51 |
+
def api_detect(data: TextInput):
|
52 |
+
ai_probability = detect_ai_content(data.text)
|
53 |
+
writing_features = stylometry_analysis(data.text)
|
54 |
+
semantic_score = semantic_analysis(data.text)
|
55 |
+
|
56 |
+
is_ai_generated = (
|
57 |
+
ai_probability > 0.5 or
|
58 |
+
writing_features["complex_words_ratio"] > 0.4 or
|
59 |
+
semantic_score > 0.2
|
60 |
+
)
|
61 |
+
|
62 |
+
result = "AI-Generated" if is_ai_generated else "Human-Written"
|
63 |
+
|
64 |
+
return {
|
65 |
+
"AI Probability": round(ai_probability, 2),
|
66 |
+
"Complex Words Ratio": round(writing_features["complex_words_ratio"], 2),
|
67 |
+
"Passive Voice Count": writing_features["passive_voice_count"],
|
68 |
+
"Semantic Score": round(semantic_score, 2),
|
69 |
+
"Final Verdict": result
|
70 |
+
}
|
|
|
|
|
|
|
71 |
|
72 |
# Streamlit UI
|
73 |
st.title("Self-Learning AI Content Detector")
|
74 |
+
st.write("Detect AI-generated text and analyze writing patterns.")
|
75 |
|
76 |
text_input = st.text_area("Enter text to analyze:")
|
77 |
|
78 |
if st.button("Analyze"):
|
79 |
if text_input.strip():
|
80 |
+
response = requests.post("http://127.0.0.1:8000/api/detect", json={"text": text_input})
|
81 |
+
result = response.json()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
st.subheader("Detection Results")
|
84 |
+
st.write(f"**AI Probability:** {result['AI Probability']:.2f}")
|
85 |
+
st.write(f"**Complex Words Ratio:** {result['Complex Words Ratio']:.2f}")
|
86 |
+
st.write(f"**Passive Voice Count:** {result['Passive Voice Count']}")
|
87 |
+
st.write(f"**Semantic Score:** {result['Semantic Score']:.2f}")
|
88 |
+
st.subheader(f"Final Verdict: {result['Final Verdict']}")
|
|
|
|
|
|
|
|
|
89 |
else:
|
90 |
+
st.error("Please enter some text for analysis.")
|
91 |
+
|
92 |
+
# Running FastAPI in Background
|
93 |
+
def run_fastapi():
|
94 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
95 |
+
|
96 |
+
threading.Thread(target=run_fastapi, daemon=True).start()
|