Spaces:
Running
Running
File size: 2,990 Bytes
a043b44 fb25f09 23df277 517c66b a043b44 fb25f09 23df277 fb25f09 517c66b fb25f09 23df277 fb25f09 23df277 fb25f09 23df277 517c66b fb25f09 517c66b 23df277 fb25f09 23df277 517c66b fb25f09 23df277 517c66b 23df277 517c66b fb25f09 517c66b fb25f09 23df277 517c66b fb25f09 517c66b 23df277 fb25f09 517c66b 23df277 517c66b 23df277 fb25f09 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import streamlit as st
from transformers import pipeline, AutoTokenizer
from PyPDF2 import PdfReader
import docx
import plotly.graph_objects as go
# Page configuration
st.set_page_config(layout="wide")
st.title("π AI Content Analyzer")
st.markdown("Upload PDF/Word files to detect AI-generated content")
# Constants
MAX_WORDS = 1000 # Maximum words to analyze
WARNING_THRESHOLD = 1200 # Warning threshold for large files
# Load AI detection model
@st.cache_resource
def load_model():
model_name = "roberta-base-openai-detector"
tokenizer = AutoTokenizer.from_pretrained(model_name)
return pipeline("text-classification", model=model_name, tokenizer=tokenizer)
detector = load_model()
def count_words(text):
return len(text.split())
def create_gauge(score):
fig = go.Figure(go.Indicator(
mode = "gauge+number",
value = score,
domain = {'x': [0, 1], 'y': [0, 1]},
title = {'text': "AI Content Probability", 'font': {'size': 20}},
gauge = {
'axis': {'range': [None, 100], 'tickwidth': 1},
'bar': {'color': "darkblue"},
'steps': [
{'range': [0, 50], 'color': 'green'},
{'range': [50, 75], 'color': 'yellow'},
{'range': [75, 100], 'color': 'red'}]
}))
st.plotly_chart(fig, use_container_width=True)
# File uploader
uploaded_file = st.file_uploader("Upload file (PDF or Word)", type=["pdf", "docx"])
if uploaded_file:
# Extract text
text = ""
if uploaded_file.name.endswith(".pdf"):
reader = PdfReader(uploaded_file)
text = " ".join([page.extract_text() or "" for page in reader.pages])
else:
doc = docx.Document(uploaded_file)
text = " ".join([para.text for para in doc.paragraphs])
word_count = count_words(text)
# Word limit warning
if word_count > WARNING_THRESHOLD:
st.warning(f"β οΈ File contains {word_count} words (Analyzing first {MAX_WORDS} words only)")
if st.button("Analyze Content"):
if word_count < 50:
st.error("β Insufficient text for analysis (minimum 50 words required)")
else:
# Process first 1000 words
processed_text = " ".join(text.split()[:MAX_WORDS])
# Perform analysis
result = detector(processed_text)
ai_prob = result[0]['score']*100 if result[0]['label']=='FAKE' else 100-result[0]['score']*100
# Display results
st.subheader("Analysis Results")
create_gauge(ai_prob)
col1, col2 = st.columns(2)
with col1:
st.metric("Words Analyzed", f"{min(word_count, MAX_WORDS)}/{word_count}")
with col2:
st.metric("AI Probability", f"{ai_prob:.1f}%")
with st.expander("View Text Sample"):
st.text(processed_text[:1000] + ("..." if word_count>1000 else "")) |