Spaces:

idkash1
/

detect-edits-in-ai-generated-text

Sleeping

File size: 4,128 Bytes

80dcd43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
09035e3
9e10878
09035e3
80dcd43
dab9f5f
 
a969752
80dcd43
 
 
 
7798d42
 
80dcd43
 
 
 
6cca969
7798d42
80dcd43
 
 
6cca969
80dcd43
656f70b
7798d42
 
63d13af
6cca969
80dcd43
 
 
 
 
 
 
 
 
 
926e64b
 
 
 
 
 
7798d42
 
926e64b
7798d42
 
 
56c7adc
974c9e1
 
dab9f5f
7798d42
 
 
 
 
6cca969
07cb8d2
7798d42
 
 
 
 
6cca969
7798d42
 
 
6b749ba
7798d42
 
 
974c9e1
 
 
dab9f5f
7798d42
 
 
 
 
 
 
 
 
80dcd43
6cca969
80dcd43
 
 
 
 
 
6cca969
 
80dcd43
 
 
 
6cca969
 
80dcd43
 
 
 
 
c14636f
 
 
 
 
 
80dcd43
 
c79a37e

#https://www.freecodecamp.org/news/how-to-setup-virtual-environments-in-python/
#https://www.youtube.com/watch?v=qbLc5a9jdXo&ab_channel=CalebCurry
#https://stackoverflow.com/questions/26368306/export-is-not-recognized-as-an-internal-or-external-command
#python3 -m venv .venv
#source .venv/bin/activate
#
#pip freeze > requirements.txt
#$env:FLASK_APP="application.py" #set FLASK_APP=application.py # export FLASK_APP=application.py 
#set FLASK_ENV=development #export FLASK_ENV=production
#flask run #flask run --host=0.0.0.0

#pip install torchvision

from flask import Flask, request, jsonify
from flask_cors import CORS
import pandas
import threading
import uuid
import time
from human_text_detect import detect_human_text
import os

HF_TOKEN = os.getenv("HF_TOKEN")

app = Flask(__name__)
CORS(app)

task_results = {}

@app.route('/')
def index():
    return 'Hello'

def process_analysis(task_id, text, model_name, topic, threshold):
    print(f"Processing task: {task_id}")

    # Validate data
    print('Validate data')
    answer = validate_data(text, model_name, topic)
    if answer != '':
        task_results[task_id] = {'status': 'error', 'error': answer}
        return

    topic = check_topic(topic)
    hcRelativeToThreshold, hc, df_sentences = detect_human_text(model_name, topic, threshold, text)

    sentences = [
        {
            "sentence": row["sentence"],
            "lppt": row["response"],
            "pvalue": row["pvalue"],
            "color": "#f5aca4" if row["pvalue"] < 0.05 else ""
        }
        for _, row in df_sentences.iterrows()
    ]

    foundPotentialEdits = [sentence for sentence in sentences if sentence['pvalue'] < 0.05]
    
    message = f'We found {len(foundPotentialEdits)} potential edits in the text' if len(foundPotentialEdits) > 0 else 'We couldn\'t find edits in the text'

    
    
    # Store the result
    task_results[task_id] = {'status': 'completed', 'message': message, 'hcRelativeToThreshold': hcRelativeToThreshold, 'HC': hc, 'sentences': sentences}

@app.route('/detectHumanInAIText/checkText', methods=['POST'])
def check_text():
    received_token = request.headers.get("Authorization")
    # if received_token != HF_TOKEN:
    #     return jsonify({"error": "Unauthorized"}), 403  # Forbidden
    
    # Get data
    print('Get data')
    data = request.get_json()
    text = data.get('text')
    model_name = data.get('model')
    topic = data.get('topic')
    threshold = data.get('threshold')

    # Generate a unique taskId
    task_id = str(uuid.uuid4())

    # Start processing in a separate thread
    thread = threading.Thread(target=process_analysis, args=(task_id, text, model_name, topic, threshold))
    thread.start()

    # Return taskId immediately
    return jsonify({'taskId': received_token}), 202

@app.route('/detectHumanInAIText/getAnalyzeResults', methods=['GET'])
def get_results():
    # received_token = request.headers.get("HF_TOKEN")
    # if received_token != HF_TOKEN:
    #     return jsonify({"error": "Unauthorized"}), 403  # Forbidden
        
    task_id = request.args.get('taskId')
    
    if not task_id:
        return jsonify({'error': 'Missing taskId parameter'}), 400
    
    if task_id not in task_results:
        return jsonify({'status': 'pending'}), 202
    
    return jsonify(task_results.pop(task_id)), 200

def validate_data(text, model_name, topic):
    if text is None or text == '':
        return 'Text is missing'

    if model_name is None or model_name == '':
        return 'Model name is missing'

    if topic is None or topic == '':
        return 'Topic is missing'
    
    if model_name not in ['GPT2XL', 'PHI2']:
        return f'Model {model_name} not supported'
    
    if check_topic(topic) == None:
        return f'Topic {topic} not supported'
    
    return ''

def check_topic(topic):
    topic_dict = {
        'empirical': 'empirical',
        'figures': 'characters',
        'landmarks': 'locations',
        'nature': 'nature',
        'games': 'video_games_series_movies',
        'wars': 'war'
    }

    return topic_dict[topic] if topic in topic_dict else None