Spaces:

Neha13
/

AI_Content_Detector

Sleeping

App Files Files Community

Neha13 commited on Apr 12

Commit

285d2df

verified ·

1 Parent(s): c0e781a

Create app.py

Browse files

Files changed (1) hide show

app.py +97 -0

app.py ADDED Viewed

	@@ -0,0 +1,97 @@

+import streamlit as st
+from transformers import GPT2Tokenizer, GPT2LMHeadModel
+import torch
+import nltk
+from nltk.util import ngrams
+from nltk.probability import FreqDist
+import plotly.express as px
+import torch.nn.functional as F
+from collections import Counter
+from nltk.corpus import stopwords
+import string
+import nltk
+nltk.download('punkt')
+nltk.download('stopwords')
+# Initialize tokenizer and model
+tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
+model = GPT2LMHeadModel.from_pretrained('gpt2')
+def c_perplexity(text):
+    """Calculate the perplexity of the given text using GPT-2."""
+    if not text.strip():
+        return float('inf')  # Return inf for empty input
+    input_ids = tokenizer.encode(text, add_special_tokens=False, return_tensors='pt')
+    if input_ids.size(1) == 0:  # Check for empty input after encoding
+        return float('inf')
+    with torch.no_grad():
+        outputs = model(input_ids)
+        logits = outputs.logits
+    loss = F.cross_entropy(logits.view(-1, logits.size(-1)), input_ids.view(-1))
+    perplexity = torch.exp(loss)
+    return perplexity.item()
+def c_burstiness(text):
+    """Calculate the burstiness of the given text."""
+    tokens = nltk.word_tokenize(text.lower())
+    if not tokens:
+        return 0.0
+    word_freq = FreqDist(tokens)
+    repeated_count = sum(count > 1 for count in word_freq.values())
+    b_score = repeated_count / len(word_freq) if len(word_freq) > 0 else 0.0
+    return b_score
+def top_repword_count(text):
+    """Generate a bar chart of the top 10 most repeated words."""
+    tokens = nltk.word_tokenize(text.lower())
+    stop_words = set(stopwords.words('english'))
+    tokens = [token for token in tokens if token not in stop_words and token not in string.punctuation]
+    word_counts = Counter(tokens)
+    top_words = word_counts.most_common(10)
+    if not top_words:
+        st.write("No significant words found.")
+        return
+    words, counts = zip(*top_words)
+    fig = px.bar(x=words, y=counts, labels={'x': 'Words', 'y': 'Counts'}, title="Top 10 Most Repeated Words in the Text")
+    st.plotly_chart(fig, user_container_width=True)
+# Streamlit app configuration
+st.set_page_config(layout="wide")
+st.title("AI Content Detector")
+text_area = st.text_area("Enter your text here!")
+if text_area:
+    if st.button("Analyse the content"):
+        col1, col2, col3 = st.columns([1, 2, 1])
+        with col1:
+            st.info("Your input text")
+            st.success(text_area)
+        with col2:
+            st.info("Your output score")
+            perplexity = c_perplexity(text_area)
+            burstiness = c_burstiness(text_area)
+            st.success(f"Perplexity score: {perplexity}")
+            st.success(f"Burstiness score: {burstiness}")
+            if perplexity > 40000 or burstiness < 0.24:
+                st.error("Result: The text is likely AI-generated.")
+            else:
+                st.success("Result: The text is not AI-generated.")
+            st.warning("Disclaimer: AI plagiarism detector apps can assist in identifying potential instances of plagiarism.")
+        with col3:
+            st.info("Basic Review")
+            top_repword_count(text_area)