CheckGPT / app.py
Alexcruger's picture
update app commit
0c4aff5
raw
history blame
9.19 kB
##### PREPARATIONS
# libraries
import datetime
import gc
import json
import os
import pickle
import sys
import urllib.request
import requests
import numpy as np
import pandas as pd
import streamlit as st
MIN_TEXT_LEN = 300
MAX_TEXT_LEN = 32410
API_URL = "http://checkgpt.app:8880/predict"
# download with progress bar
mybar = None
def show_progress(block_num, block_size, total_size):
global mybar
if mybar is None:
mybar = st.progress(0.0)
downloaded = block_num * block_size / total_size
if downloaded <= 1.0:
mybar.progress(downloaded)
else:
mybar.progress(1.0)
##### CONFIG
# page config
st.set_page_config(page_title="CheckGPT - ChatGPT and other big LM detection engine",
page_icon=":books:",
layout="centered",
initial_sidebar_state="collapsed",
menu_items=None)
##### HEADER
# title
st.title('CheckGPT - AI-written text detect')
# description
st.write(
'CheckGPT is a neural network to check if text is generated by '\
'big AI LMs (like ChatGPT, GPT3, GPT2, BLOOM, You.com AI and etc).')
st.write('Currently supported languages are: [\'en\'].')
st.write("Use our web app and RestAPI at https://checkgpt.app.\n")
st.write('Already telegram bot is available at: https://t.me/chatgpt_bot.')
st.write('To connect with authors please write to: https://t.me/uberwow | https://CheckGPT.app')
##### PARAMETERS
# title
st.header('Check for AI generated text?')
# input text
input_text = st.text_area('Which text would you like to check?', '')
##### MODELING
# compute readability
if st.button('Check'):
# compute predictions
with st.spinner('Computing prediction...'):
# compute prediction
if len(input_text) < MIN_TEXT_LEN:
st.error(f'😐 Minimal text length: {MIN_TEXT_LEN} characters. Your text is {len(input_text)} characters.\n'
f'The longer the text, the higher the accuracy.\n')
elif len(input_text) > MAX_TEXT_LEN:
st.error('⚠ Seems we got too big input! Please try again with smaller text!')
else:
# make json-valid request
formatted_req = {"text": input_text}
jsoned_req = json.dumps(formatted_req)
# try to send request to api
try:
r = requests.post(API_URL, data=jsoned_req, headers={'Content-Type': 'application/json'})
answer = json.loads(r.text)
except:
st.error('Some backend RESTAPI error. Please try later!')
print("Some error while sending POST request to RESTAPI. Please check your connection and try again!")
# extract data from json anwser
result = int(answer["result"])
human_score = round(float(answer["human_score"]), 4)
chatgpt_score = round(float(answer["ChatGPT_score"]), 4)
req_id = answer["request_id"]
execution_time = answer["execution_time"]
# clear memory
#gc.collect()
# print output
st.metric(label="Prediction", value=result)
st.write(f"βœ… Total score (AI signs):\n\n Human-like score: {human_score}%\n ChatGPT-like score: {chatgpt_score}%\n")
st.write(f"βš™ Information:\n\n Execution time: {execution_time} sec.\n Request ID: {req_id}\n Prediction: ", result)
if result == 0:
st.success(f"πŸ‘¨ Seems like text was written by the human ({human_score}%)!")
if result == 1:
st.success(f"πŸ€– Seems to be a ChatGPT generated text! ({chatgpt_score}%)")
# save log
with open('logs/' + 'text_' + str(
result) + '_' + '{:%Y-%m-%d_%H-%M-%S}'.format(
datetime.datetime.now()) + '.log', 'w', encoding="utf-8") as file:
file.write(input_text)
##### DOCUMENTATION
# header
st.header('More information')
# example texts
with st.expander('Show example texts'):
st.table(pd.DataFrame({
'Text': ["""AI is already transforming a number of industries, including healthcare, finance, transportation, and retail, among others. Here are a few ways AI is revolutionizing these fields:
Healthcare: AI is helping medical professionals diagnose diseases and develop new treatments by analyzing vast amounts of medical data. Additionally, AI-powered systems are being developed to assist in surgeries and other medical procedures, leading to improved patient outcomes.
Finance: AI is helping financial institutions better understand and manage risk, detect fraud, and improve customer service. Machine learning algorithms are used to analyze financial data and make predictions about market trends, helping to inform investment decisions.
Transportation: AI is playing a major role in the development of autonomous vehicles, which have the potential to revolutionize the way we travel. Additionally, AI-powered logistics systems are helping companies optimize their supply chains, reducing costs and improving delivery times.
Retail: AI is being used to improve the shopping experience for customers by personalizing product recommendations, optimizing pricing, and streamlining the checkout process. Additionally, AI-powered systems are helping retailers better understand customer behavior, leading to improved inventory management and reduced waste.
These are just a few examples of how AI is revolutionizing different industries. The potential applications of AI are nearly limitless, and it will continue to play a major role in shaping our future.""",
"""The most profitable use of GPT-3 would depend on the specific application and industry in which it is used. Some examples of profitable uses for GPT-3 include:
Content creation: GPT-3 can generate high-quality, unique content quickly and efficiently, which can be used for blog posts, articles, and other types of written content.
Chatbots: GPT-3 can be used to build chatbots that can understand and respond to natural language input, which can be used in customer service and other applications where human-like conversation is important.
Language Translation: GPT-3 can be used to create accurate and efficient language translation models, which can be used in a variety of industries, such as e-commerce, travel, and more.
Summarization: GPT-3 can be used to summarize large bodies of text, such as news articles or legal documents, making them more manageable for human readers.
Research and analytics: GPT-3 can be used to extract insights and information from large amounts of unstructured data, such as social media posts or customer feedback.
Ultimately, the most profitable use of GPT-3 will depend on the specific needs and goals of the organization using it.""",
"""Summary: Bayesian optimization tools are a compelling option once we’re done exploring for good search spaces and have decided what hyperparameters even should be tuned at all.
At some point, our priorities will shift from learning more about the tuning problem to producing a single best configuration to launch or otherwise use.
At this point, there should be a refined search space that comfortably contains the local region around the best observed trial and has been adequately sampled.
Our exploration work should have revealed the most essential hyperparameters to tune (as well as sensible ranges for them) that we can use to construct a search space for a final automated tuning study using as large a tuning budget as possible.
Since we no longer care about maximizing our insight into the tuning problem, many of the advantages of quasi-random search no longer apply and Bayesian optimization tools should be used to automatically find the best hyperparameter configuration.
If the search space contains a non-trivial volume of divergent points (points that get NaN training loss or even training loss many standard deviations worse than the mean), it is important to use black box optimization tools that properly handle trials that diverge (see Bayesian Optimization with Unknown Constraints for an excellent way to deal with this issue).
At this point, we should also consider checking the performance on the test set.
In principle, we could even fold the validation set into the training set and retraining the best configuration found with Bayesian optimization. However, this is only appropriate if there won't be future launches with this specific workload (e.g. a one-time Kaggle competition)."""],
'Predictions': ['ChatGPT', 'ChatGPT', 'Human'],
}))
# models
with st.expander('Read about the models'):
st.write(
'For ChatGPT generated content detection, we are using statistical and heuristical methods, perplexity, '
'entropy, '
'coherence and consistency of the text and some our personal know-how ;).')
# metric
##### CONTACT
# header
st.header("Contact")
# website link
# copyright
st.text("All rights reserved Β© 2023 | https://t.me/uberwow | https://CheckGPT.app")