Spaces:

idkash1
/

detect-edits-in-ai-generated-text

Sleeping

File size: 3,504 Bytes

#https://www.freecodecamp.org/news/how-to-setup-virtual-environments-in-python/
#https://www.youtube.com/watch?v=qbLc5a9jdXo&ab_channel=CalebCurry
#https://stackoverflow.com/questions/26368306/export-is-not-recognized-as-an-internal-or-external-command
#python3 -m venv .venv
#source .venv/bin/activate
#
#pip freeze > requirements.txt
#$env:FLASK_APP="application.py" #set FLASK_APP=application.py # export FLASK_APP=application.py 
#set FLASK_ENV=development #export FLASK_ENV=production
#flask run #flask run --host=0.0.0.0

#pip install torchvision

from flask import Flask, request, jsonify
from flask_cors import CORS
import pandas
import threading
import uuid
import time
from human_text_detect import detect_human_text

app = Flask(__name__)
CORS(app)

task_results = {}

@app.route('/')
def index():
    return 'Hello'

def process_analysis(task_id, text, model_name, topic):
    print(f"Processing task: {task_id}")

    # Validate data
    print('Validate data')
    answer = validate_data(text, model_name, topic)
    if answer != '':
        task_results[task_id] = {'status': 'error', 'error': answer}
        return

    topic = check_topic(topic)
    hcRelativeToThreshold, df_sentences = detect_human_text(model_name, topic, text)
    message = 'Edits found in the text' if hcRelativeToThreshold >= 0 else 'We couldn\'t find edits in the text'

    sentences = [
        {
            "sentence": row["sentence"],
            "lppt": row["response"],
            "pvalue": row["pvalue"],
            "color": "#f5aca4" if row["pvalue"] < 0.05 else ""
        }
        for _, row in df_sentences.iterrows()
    ]
    
    # Store the result
    task_results[task_id] = {'status': 'completed', 'message': message, 'hcRelativeToThreshold': hcRelativeToThreshold, 'sentences': sentences}

@app.route('/detectHumanInAIText/checkText', methods=['POST'])
def check_text():
    # Get data
    print('Get data')
    data = request.get_json()
    text = data.get('text')
    model_name = data.get('model')
    topic = data.get('topic')

    # Generate a unique taskId
    task_id = str(uuid.uuid4())

    # Start processing in a separate thread
    thread = threading.Thread(target=process_analysis, args=(task_id, text, model_name, topic))
    thread.start()

    # Return taskId immediately
    return jsonify({'taskId': task_id}), 202

@app.route('/detectHumanInAIText/getAnalyzeResults', methods=['GET'])
def get_results():
    task_id = request.args.get('taskId')
    
    if not task_id:
        return jsonify({'error': 'Missing taskId parameter'}), 400
    
    if task_id not in task_results:
        return jsonify({'status': 'pending'}), 202
    
    return jsonify(task_results.pop(task_id)), 200

def validate_data(text, model_name, topic):
    if text is None or text == '':
        return 'Text is missing'

    if model_name is None or model_name == '':
        return 'Model name is missing'

    if topic is None or topic == '':
        return 'Topic is missing'
    
    if model_name not in ['GPT2XL', 'PHI2']:
        return f'Model {model_name} not supported'
    
    if check_topic(topic) == None:
        return f'Topic {topic} not supported'
    
    return ''

def check_topic(topic):
    topic_dict = {
        'empirical': 'empirical',
        'figures': 'characters',
        'landmarks': 'locations',
        'nature': 'nature',
        'games': 'video_games_series_movies',
        'wars': 'war'
    }

    return topic_dict[topic] if topic in topic_dict else None