SabkeSawaal68 commited on
Commit
10ae628
Β·
verified Β·
1 Parent(s): cb58b29

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -0
app.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
4
+ import requests
5
+ import re
6
+ from bs4 import BeautifulSoup
7
+
8
+ # Hugging Face AI Model
9
+ MODEL_NAME = "roberta-base-openai-detector"
10
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
11
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
12
+
13
+ # AI Content Detection Function
14
+ def detect_ai_content(text):
15
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
16
+ outputs = model(**inputs)
17
+ scores = torch.nn.functional.softmax(outputs.logits, dim=1)
18
+ return scores[0][1].item()
19
+
20
+ # Writing Style Analysis
21
+ def stylometry_analysis(text):
22
+ words = text.split()
23
+ avg_word_length = sum(len(word) for word in words) / len(words) if words else 0
24
+ complex_words_ratio = len([word for word in words if len(word) > 6]) / len(words) if words else 0
25
+ passive_voice_count = len(re.findall(r'\b(is|was|were|has been|have been|had been)\b \w+ed', text))
26
+ return {"avg_word_length": avg_word_length, "complex_words_ratio": complex_words_ratio, "passive_voice_count": passive_voice_count}
27
+
28
+ # Semantic Similarity Analysis
29
+ def semantic_analysis(text):
30
+ keywords = ["AI", "generated", "neural network", "LLM", "GPT", "transformer"]
31
+ ai_patterns = sum([text.lower().count(keyword) for keyword in keywords])
32
+ return ai_patterns / len(text.split()) if text.split() else 0
33
+
34
+ # Web Crawling for Plagiarism
35
+ def check_plagiarism(text):
36
+ search_url = f"https://www.google.com/search?q={'+'.join(text.split()[:10])}"
37
+ headers = {"User-Agent": "Mozilla/5.0"}
38
+ response = requests.get(search_url, headers=headers)
39
+ if response.status_code == 200:
40
+ soup = BeautifulSoup(response.text, "html.parser")
41
+ links = [a["href"] for a in soup.find_all("a", href=True)]
42
+ return any("wikipedia" in link or "researchgate" in link or "arxiv" in link for link in links)
43
+ return False
44
+
45
+ # Streamlit UI
46
+ st.title("πŸš€ Ultra-Advanced AI Content Detector")
47
+ st.write("πŸ” Detect whether the text is AI-generated or human-written.")
48
+
49
+ text_input = st.text_area("πŸ“ Enter Text Below:", "")
50
+
51
+ if st.button("Analyze Text"):
52
+ ai_probability = detect_ai_content(text_input)
53
+ writing_features = stylometry_analysis(text_input)
54
+ semantic_score = semantic_analysis(text_input)
55
+ is_plagiarized = check_plagiarism(text_input)
56
+
57
+ is_ai_generated = (ai_probability > 0.5 or writing_features["complex_words_ratio"] > 0.4 or semantic_score > 0.2 or is_plagiarized)
58
+
59
+ st.subheader("πŸ“Š Analysis Results:")
60
+ st.write(f"πŸ”Ή AI Probability: {ai_probability:.2f}")
61
+ st.write(f"πŸ”Ή Writing Features: {writing_features}")
62
+ st.write(f"πŸ”Ή Semantic Score: {semantic_score:.2f}")
63
+ st.write(f"πŸ”Ή Plagiarism Detected: {'Yes' if is_plagiarized else 'No'}")
64
+ st.subheader("🧐 Final Verdict:")
65
+ st.write("βœ… **Human-Written**" if not is_ai_generated else "❌ **AI-Generated**")