Spaces:

SabkeSawaal68
/

ai-content-detector-advance

Running

App Files Files Community

SabkeSawaal68 commited on Feb 7

Commit

10ae628

verified ·

1 Parent(s): cb58b29

Create app.py

Browse files

Files changed (1) hide show

app.py +65 -0

app.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import streamlit as st
+import torch
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+import requests
+import re
+from bs4 import BeautifulSoup
+# Hugging Face AI Model
+MODEL_NAME = "roberta-base-openai-detector"
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
+# AI Content Detection Function
+def detect_ai_content(text):
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
+    outputs = model(**inputs)
+    scores = torch.nn.functional.softmax(outputs.logits, dim=1)
+    return scores[0][1].item()
+# Writing Style Analysis
+def stylometry_analysis(text):
+    words = text.split()
+    avg_word_length = sum(len(word) for word in words) / len(words) if words else 0
+    complex_words_ratio = len([word for word in words if len(word) > 6]) / len(words) if words else 0
+    passive_voice_count = len(re.findall(r'\b(is|was|were|has been|have been|had been)\b \w+ed', text))
+    return {"avg_word_length": avg_word_length, "complex_words_ratio": complex_words_ratio, "passive_voice_count": passive_voice_count}
+# Semantic Similarity Analysis
+def semantic_analysis(text):
+    keywords = ["AI", "generated", "neural network", "LLM", "GPT", "transformer"]
+    ai_patterns = sum([text.lower().count(keyword) for keyword in keywords])
+    return ai_patterns / len(text.split()) if text.split() else 0
+# Web Crawling for Plagiarism
+def check_plagiarism(text):
+    search_url = f"https://www.google.com/search?q={'+'.join(text.split()[:10])}"
+    headers = {"User-Agent": "Mozilla/5.0"}
+    response = requests.get(search_url, headers=headers)
+    if response.status_code == 200:
+        soup = BeautifulSoup(response.text, "html.parser")
+        links = [a["href"] for a in soup.find_all("a", href=True)]
+        return any("wikipedia" in link or "researchgate" in link or "arxiv" in link for link in links)
+    return False
+# Streamlit UI
+st.title("🚀 Ultra-Advanced AI Content Detector")
+st.write("🔍 Detect whether the text is AI-generated or human-written.")
+text_input = st.text_area("📝 Enter Text Below:", "")
+if st.button("Analyze Text"):
+    ai_probability = detect_ai_content(text_input)
+    writing_features = stylometry_analysis(text_input)
+    semantic_score = semantic_analysis(text_input)
+    is_plagiarized = check_plagiarism(text_input)
+    is_ai_generated = (ai_probability > 0.5 or writing_features["complex_words_ratio"] > 0.4 or semantic_score > 0.2 or is_plagiarized)
+    st.subheader("📊 Analysis Results:")
+    st.write(f"🔹 AI Probability: {ai_probability:.2f}")
+    st.write(f"🔹 Writing Features: {writing_features}")
+    st.write(f"🔹 Semantic Score: {semantic_score:.2f}")
+    st.write(f"🔹 Plagiarism Detected: {'Yes' if is_plagiarized else 'No'}")
+    st.subheader("🧐 Final Verdict:")
+    st.write("✅ **Human-Written**" if not is_ai_generated else "❌ **AI-Generated**")