Spaces:

SabkeSawaal68
/

ai-content-detector-advance

Running

App Files Files Community

SabkeSawaal68 commited on Feb 7

Commit

bc5dbea

verified ·

1 Parent(s): 357c48d

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -52

app.py CHANGED Viewed

@@ -4,14 +4,23 @@ from transformers import AutoModelForSequenceClassification, AutoTokenizer
 import requests
 import re
 import numpy as np
-from bs4 import BeautifulSoup
-from datasets import load_dataset
 # Load AI detection model
 MODEL_NAME = "roberta-base-openai-detector"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
 # AI content detection function
 def detect_ai_content(text):
     inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
@@ -37,64 +46,51 @@ def semantic_analysis(text):
     ai_patterns = sum([text.lower().count(keyword) for keyword in keywords])
     return ai_patterns / len(text.split()) if text.split() else 0
-# Web crawling for plagiarism detection
-def check_plagiarism(text):
-    search_url = f"https://www.google.com/search?q={'+'.join(text.split()[:10])}"
-    headers = {"User-Agent": "Mozilla/5.0"}
-    response = requests.get(search_url, headers=headers)
-    if response.status_code == 200:
-        soup = BeautifulSoup(response.text, "html.parser")
-        links = [a["href"] for a in soup.find_all("a", href=True)]
-        return any("wikipedia" in link or "researchgate" in link or "arxiv" in link for link in links)
-    return False
-# Self-learning system (Internet AI Content Collection)
-def collect_ai_generated_text():
-    dataset = load_dataset("openai/webgpt_comparisons", split="train")
-    ai_text_samples = [item["answer_1"] for item in dataset.select(range(50))]
-    return ai_text_samples
-# Auto-learn from new AI models
-def update_model_with_new_patterns():
-    new_ai_texts = collect_ai_generated_text()
-    for text in new_ai_texts:
-        ai_score = detect_ai_content(text)
-        if ai_score < 0.5:
-            # Fine-tune model logic (this will need cloud-based model retraining)
-            pass
 # Streamlit UI
 st.title("Self-Learning AI Content Detector")
-st.write("Detect AI-generated text and continuously learn from new AI models.")
 text_input = st.text_area("Enter text to analyze:")
 if st.button("Analyze"):
     if text_input.strip():
-        ai_probability = detect_ai_content(text_input)
-        writing_features = stylometry_analysis(text_input)
-        semantic_score = semantic_analysis(text_input)
-        is_plagiarized = check_plagiarism(text_input)
-        # Multi-layer AI detection logic
-        is_ai_generated = (
-            ai_probability > 0.5 or
-            writing_features["complex_words_ratio"] > 0.4 or
-            semantic_score > 0.2 or
-            is_plagiarized
-        )
-        result = "AI-Generated" if is_ai_generated else "Human-Written"
         st.subheader("Detection Results")
-        st.write(f"**AI Probability:** {ai_probability:.2f}")
-        st.write(f"**Complex Words Ratio:** {writing_features['complex_words_ratio']:.2f}")
-        st.write(f"**Passive Voice Count:** {writing_features['passive_voice_count']}")
-        st.write(f"**Semantic Score:** {semantic_score:.2f}")
-        st.write(f"**Plagiarism Detected:** {'Yes' if is_plagiarized else 'No'}")
-        st.subheader(f"Final Verdict: {result}")
-        # Auto-learn from new AI patterns
-        update_model_with_new_patterns()
     else:
-        st.error("Please enter some text for analysis.")

 import requests
 import re
 import numpy as np
+from fastapi import FastAPI
+from pydantic import BaseModel
+import uvicorn
+import threading
 # Load AI detection model
 MODEL_NAME = "roberta-base-openai-detector"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
+# FastAPI App
+app = FastAPI()
+# API Input Model
+class TextInput(BaseModel):
+    text: str
 # AI content detection function
 def detect_ai_content(text):
     inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
     ai_patterns = sum([text.lower().count(keyword) for keyword in keywords])
     return ai_patterns / len(text.split()) if text.split() else 0
+# FastAPI Route for AI Detection
+@app.post("/api/detect")
+def api_detect(data: TextInput):
+    ai_probability = detect_ai_content(data.text)
+    writing_features = stylometry_analysis(data.text)
+    semantic_score = semantic_analysis(data.text)
+    is_ai_generated = (
+        ai_probability > 0.5 or
+        writing_features["complex_words_ratio"] > 0.4 or
+        semantic_score > 0.2
+    )
+    result = "AI-Generated" if is_ai_generated else "Human-Written"
+    return {
+        "AI Probability": round(ai_probability, 2),
+        "Complex Words Ratio": round(writing_features["complex_words_ratio"], 2),
+        "Passive Voice Count": writing_features["passive_voice_count"],
+        "Semantic Score": round(semantic_score, 2),
+        "Final Verdict": result
+    }
 # Streamlit UI
 st.title("Self-Learning AI Content Detector")
+st.write("Detect AI-generated text and analyze writing patterns.")
 text_input = st.text_area("Enter text to analyze:")
 if st.button("Analyze"):
     if text_input.strip():
+        response = requests.post("http://127.0.0.1:8000/api/detect", json={"text": text_input})
+        result = response.json()
         st.subheader("Detection Results")
+        st.write(f"**AI Probability:** {result['AI Probability']:.2f}")
+        st.write(f"**Complex Words Ratio:** {result['Complex Words Ratio']:.2f}")
+        st.write(f"**Passive Voice Count:** {result['Passive Voice Count']}")
+        st.write(f"**Semantic Score:** {result['Semantic Score']:.2f}")
+        st.subheader(f"Final Verdict: {result['Final Verdict']}")
     else:
+        st.error("Please enter some text for analysis.")
+# Running FastAPI in Background
+def run_fastapi():
+    uvicorn.run(app, host="0.0.0.0", port=8000)
+threading.Thread(target=run_fastapi, daemon=True).start()