SabkeSawaal68 commited on
Commit
bc5dbea
·
verified ·
1 Parent(s): 357c48d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -52
app.py CHANGED
@@ -4,14 +4,23 @@ from transformers import AutoModelForSequenceClassification, AutoTokenizer
4
  import requests
5
  import re
6
  import numpy as np
7
- from bs4 import BeautifulSoup
8
- from datasets import load_dataset
 
 
9
 
10
  # Load AI detection model
11
  MODEL_NAME = "roberta-base-openai-detector"
12
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
13
  model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
14
 
 
 
 
 
 
 
 
15
  # AI content detection function
16
  def detect_ai_content(text):
17
  inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
@@ -37,64 +46,51 @@ def semantic_analysis(text):
37
  ai_patterns = sum([text.lower().count(keyword) for keyword in keywords])
38
  return ai_patterns / len(text.split()) if text.split() else 0
39
 
40
- # Web crawling for plagiarism detection
41
- def check_plagiarism(text):
42
- search_url = f"https://www.google.com/search?q={'+'.join(text.split()[:10])}"
43
- headers = {"User-Agent": "Mozilla/5.0"}
44
- response = requests.get(search_url, headers=headers)
45
- if response.status_code == 200:
46
- soup = BeautifulSoup(response.text, "html.parser")
47
- links = [a["href"] for a in soup.find_all("a", href=True)]
48
- return any("wikipedia" in link or "researchgate" in link or "arxiv" in link for link in links)
49
- return False
50
-
51
- # Self-learning system (Internet AI Content Collection)
52
- def collect_ai_generated_text():
53
- dataset = load_dataset("openai/webgpt_comparisons", split="train")
54
- ai_text_samples = [item["answer_1"] for item in dataset.select(range(50))]
55
- return ai_text_samples
56
-
57
- # Auto-learn from new AI models
58
- def update_model_with_new_patterns():
59
- new_ai_texts = collect_ai_generated_text()
60
- for text in new_ai_texts:
61
- ai_score = detect_ai_content(text)
62
- if ai_score < 0.5:
63
- # Fine-tune model logic (this will need cloud-based model retraining)
64
- pass
65
 
66
  # Streamlit UI
67
  st.title("Self-Learning AI Content Detector")
68
- st.write("Detect AI-generated text and continuously learn from new AI models.")
69
 
70
  text_input = st.text_area("Enter text to analyze:")
71
 
72
  if st.button("Analyze"):
73
  if text_input.strip():
74
- ai_probability = detect_ai_content(text_input)
75
- writing_features = stylometry_analysis(text_input)
76
- semantic_score = semantic_analysis(text_input)
77
- is_plagiarized = check_plagiarism(text_input)
78
-
79
- # Multi-layer AI detection logic
80
- is_ai_generated = (
81
- ai_probability > 0.5 or
82
- writing_features["complex_words_ratio"] > 0.4 or
83
- semantic_score > 0.2 or
84
- is_plagiarized
85
- )
86
-
87
- result = "AI-Generated" if is_ai_generated else "Human-Written"
88
 
89
  st.subheader("Detection Results")
90
- st.write(f"**AI Probability:** {ai_probability:.2f}")
91
- st.write(f"**Complex Words Ratio:** {writing_features['complex_words_ratio']:.2f}")
92
- st.write(f"**Passive Voice Count:** {writing_features['passive_voice_count']}")
93
- st.write(f"**Semantic Score:** {semantic_score:.2f}")
94
- st.write(f"**Plagiarism Detected:** {'Yes' if is_plagiarized else 'No'}")
95
- st.subheader(f"Final Verdict: {result}")
96
-
97
- # Auto-learn from new AI patterns
98
- update_model_with_new_patterns()
99
  else:
100
- st.error("Please enter some text for analysis.")
 
 
 
 
 
 
 
4
  import requests
5
  import re
6
  import numpy as np
7
+ from fastapi import FastAPI
8
+ from pydantic import BaseModel
9
+ import uvicorn
10
+ import threading
11
 
12
  # Load AI detection model
13
  MODEL_NAME = "roberta-base-openai-detector"
14
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
15
  model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
16
 
17
+ # FastAPI App
18
+ app = FastAPI()
19
+
20
+ # API Input Model
21
+ class TextInput(BaseModel):
22
+ text: str
23
+
24
  # AI content detection function
25
  def detect_ai_content(text):
26
  inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
 
46
  ai_patterns = sum([text.lower().count(keyword) for keyword in keywords])
47
  return ai_patterns / len(text.split()) if text.split() else 0
48
 
49
+ # FastAPI Route for AI Detection
50
+ @app.post("/api/detect")
51
+ def api_detect(data: TextInput):
52
+ ai_probability = detect_ai_content(data.text)
53
+ writing_features = stylometry_analysis(data.text)
54
+ semantic_score = semantic_analysis(data.text)
55
+
56
+ is_ai_generated = (
57
+ ai_probability > 0.5 or
58
+ writing_features["complex_words_ratio"] > 0.4 or
59
+ semantic_score > 0.2
60
+ )
61
+
62
+ result = "AI-Generated" if is_ai_generated else "Human-Written"
63
+
64
+ return {
65
+ "AI Probability": round(ai_probability, 2),
66
+ "Complex Words Ratio": round(writing_features["complex_words_ratio"], 2),
67
+ "Passive Voice Count": writing_features["passive_voice_count"],
68
+ "Semantic Score": round(semantic_score, 2),
69
+ "Final Verdict": result
70
+ }
 
 
 
71
 
72
  # Streamlit UI
73
  st.title("Self-Learning AI Content Detector")
74
+ st.write("Detect AI-generated text and analyze writing patterns.")
75
 
76
  text_input = st.text_area("Enter text to analyze:")
77
 
78
  if st.button("Analyze"):
79
  if text_input.strip():
80
+ response = requests.post("http://127.0.0.1:8000/api/detect", json={"text": text_input})
81
+ result = response.json()
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
  st.subheader("Detection Results")
84
+ st.write(f"**AI Probability:** {result['AI Probability']:.2f}")
85
+ st.write(f"**Complex Words Ratio:** {result['Complex Words Ratio']:.2f}")
86
+ st.write(f"**Passive Voice Count:** {result['Passive Voice Count']}")
87
+ st.write(f"**Semantic Score:** {result['Semantic Score']:.2f}")
88
+ st.subheader(f"Final Verdict: {result['Final Verdict']}")
 
 
 
 
89
  else:
90
+ st.error("Please enter some text for analysis.")
91
+
92
+ # Running FastAPI in Background
93
+ def run_fastapi():
94
+ uvicorn.run(app, host="0.0.0.0", port=8000)
95
+
96
+ threading.Thread(target=run_fastapi, daemon=True).start()