|
|
|
|
|
|
|
|
|
import datetime |
|
import gc |
|
import json |
|
import os |
|
import pickle |
|
import sys |
|
import urllib.request |
|
import requests |
|
import numpy as np |
|
import pandas as pd |
|
import streamlit as st |
|
|
|
|
|
MIN_TEXT_LEN = 300 |
|
MAX_TEXT_LEN = 32410 |
|
API_URL = "http://checkgpt.app:8880/predict" |
|
|
|
|
|
mybar = None |
|
|
|
|
|
def show_progress(block_num, block_size, total_size): |
|
global mybar |
|
if mybar is None: |
|
mybar = st.progress(0.0) |
|
downloaded = block_num * block_size / total_size |
|
if downloaded <= 1.0: |
|
mybar.progress(downloaded) |
|
else: |
|
mybar.progress(1.0) |
|
|
|
|
|
|
|
|
|
|
|
st.set_page_config(page_title="CheckGPT - ChatGPT and other big LM detection engine", |
|
page_icon=":books:", |
|
layout="centered", |
|
initial_sidebar_state="collapsed", |
|
menu_items=None) |
|
|
|
|
|
|
|
|
|
st.title('CheckGPT - AI-written text detect') |
|
|
|
|
|
|
|
st.write( |
|
'CheckGPT is a neural network to check if text is generated by '\ |
|
'big AI LMs (like ChatGPT, GPT3, GPT2, BLOOM, You.com AI and etc).') |
|
st.write('Currently supported languages are: [\'en\'].') |
|
st.write("Use our web app and RestAPI at https://checkgpt.app.\n") |
|
st.write('Already telegram bot is available at: https://t.me/chatgpt_bot.') |
|
st.write('To connect with authors please write to: https://t.me/uberwow | https://CheckGPT.app') |
|
|
|
|
|
|
|
|
|
|
|
st.header('Check for AI generated text?') |
|
|
|
|
|
|
|
input_text = st.text_area('Which text would you like to check?', '') |
|
|
|
|
|
|
|
|
|
if st.button('Check'): |
|
|
|
|
|
with st.spinner('Computing prediction...'): |
|
|
|
|
|
if len(input_text) < MIN_TEXT_LEN: |
|
st.error(f'π Minimal text length: {MIN_TEXT_LEN} characters. Your text is {len(input_text)} characters.\n' |
|
f'The longer the text, the higher the accuracy.\n') |
|
elif len(input_text) > MAX_TEXT_LEN: |
|
st.error('β Seems we got too big input! Please try again with smaller text!') |
|
else: |
|
|
|
formatted_req = {"text": input_text} |
|
jsoned_req = json.dumps(formatted_req) |
|
|
|
|
|
try: |
|
r = requests.post(API_URL, data=jsoned_req, headers={'Content-Type': 'application/json'}) |
|
answer = json.loads(r.text) |
|
except: |
|
st.error('Some backend RESTAPI error. Please try later!') |
|
print("Some error while sending POST request to RESTAPI. Please check your connection and try again!") |
|
|
|
|
|
result = int(answer["result"]) |
|
human_score = round(float(answer["human_score"]), 4) |
|
chatgpt_score = round(float(answer["ChatGPT_score"]), 4) |
|
req_id = answer["request_id"] |
|
execution_time = answer["execution_time"] |
|
|
|
|
|
|
|
|
|
|
|
st.metric(label="Prediction", value=result) |
|
|
|
st.write(f"β
Total score (AI signs):\n\n Human-like score: {human_score}%\n ChatGPT-like score: {chatgpt_score}%\n") |
|
st.write(f"β Information:\n\n Execution time: {execution_time} sec.\n Request ID: {req_id}\n Prediction: ", result) |
|
if result == 0: |
|
st.success(f"π¨ Seems like text was written by the human ({human_score}%)!") |
|
|
|
if result == 1: |
|
st.success(f"π€ Seems to be a ChatGPT generated text! ({chatgpt_score}%)") |
|
|
|
|
|
with open('logs/' + 'text_' + str( |
|
result) + '_' + '{:%Y-%m-%d_%H-%M-%S}'.format( |
|
datetime.datetime.now()) + '.log', 'w', encoding="utf-8") as file: |
|
file.write(input_text) |
|
|
|
|
|
|
|
|
|
|
|
|
|
st.header('More information') |
|
|
|
|
|
with st.expander('Show example texts'): |
|
st.table(pd.DataFrame({ |
|
'Text': ["""AI is already transforming a number of industries, including healthcare, finance, transportation, and retail, among others. Here are a few ways AI is revolutionizing these fields: |
|
|
|
Healthcare: AI is helping medical professionals diagnose diseases and develop new treatments by analyzing vast amounts of medical data. Additionally, AI-powered systems are being developed to assist in surgeries and other medical procedures, leading to improved patient outcomes. |
|
|
|
Finance: AI is helping financial institutions better understand and manage risk, detect fraud, and improve customer service. Machine learning algorithms are used to analyze financial data and make predictions about market trends, helping to inform investment decisions. |
|
|
|
Transportation: AI is playing a major role in the development of autonomous vehicles, which have the potential to revolutionize the way we travel. Additionally, AI-powered logistics systems are helping companies optimize their supply chains, reducing costs and improving delivery times. |
|
|
|
Retail: AI is being used to improve the shopping experience for customers by personalizing product recommendations, optimizing pricing, and streamlining the checkout process. Additionally, AI-powered systems are helping retailers better understand customer behavior, leading to improved inventory management and reduced waste. |
|
|
|
These are just a few examples of how AI is revolutionizing different industries. The potential applications of AI are nearly limitless, and it will continue to play a major role in shaping our future.""", |
|
"""The most profitable use of GPT-3 would depend on the specific application and industry in which it is used. Some examples of profitable uses for GPT-3 include: |
|
|
|
Content creation: GPT-3 can generate high-quality, unique content quickly and efficiently, which can be used for blog posts, articles, and other types of written content. |
|
|
|
Chatbots: GPT-3 can be used to build chatbots that can understand and respond to natural language input, which can be used in customer service and other applications where human-like conversation is important. |
|
|
|
Language Translation: GPT-3 can be used to create accurate and efficient language translation models, which can be used in a variety of industries, such as e-commerce, travel, and more. |
|
|
|
Summarization: GPT-3 can be used to summarize large bodies of text, such as news articles or legal documents, making them more manageable for human readers. |
|
|
|
Research and analytics: GPT-3 can be used to extract insights and information from large amounts of unstructured data, such as social media posts or customer feedback. |
|
|
|
Ultimately, the most profitable use of GPT-3 will depend on the specific needs and goals of the organization using it.""", |
|
"""Summary: Bayesian optimization tools are a compelling option once weβre done exploring for good search spaces and have decided what hyperparameters even should be tuned at all. |
|
|
|
At some point, our priorities will shift from learning more about the tuning problem to producing a single best configuration to launch or otherwise use. |
|
At this point, there should be a refined search space that comfortably contains the local region around the best observed trial and has been adequately sampled. |
|
Our exploration work should have revealed the most essential hyperparameters to tune (as well as sensible ranges for them) that we can use to construct a search space for a final automated tuning study using as large a tuning budget as possible. |
|
Since we no longer care about maximizing our insight into the tuning problem, many of the advantages of quasi-random search no longer apply and Bayesian optimization tools should be used to automatically find the best hyperparameter configuration. |
|
If the search space contains a non-trivial volume of divergent points (points that get NaN training loss or even training loss many standard deviations worse than the mean), it is important to use black box optimization tools that properly handle trials that diverge (see Bayesian Optimization with Unknown Constraints for an excellent way to deal with this issue). |
|
At this point, we should also consider checking the performance on the test set. |
|
In principle, we could even fold the validation set into the training set and retraining the best configuration found with Bayesian optimization. However, this is only appropriate if there won't be future launches with this specific workload (e.g. a one-time Kaggle competition)."""], |
|
'Predictions': ['ChatGPT', 'ChatGPT', 'Human'], |
|
})) |
|
|
|
|
|
with st.expander('Read about the models'): |
|
st.write( |
|
'For ChatGPT generated content detection, we are using statistical and heuristical methods, perplexity, ' |
|
'entropy, ' |
|
'coherence and consistency of the text and some our personal know-how ;).') |
|
|
|
|
|
|
|
|
|
|
|
|
|
st.header("Contact") |
|
|
|
|
|
|
|
|
|
st.text("All rights reserved Β© 2023 | https://t.me/uberwow | https://CheckGPT.app") |
|
|