|
from flask import Flask, request, jsonify |
|
from flask_cors import CORS |
|
import pandas |
|
import threading |
|
import uuid |
|
import time |
|
from human_text_detect import detect_human_text |
|
import os |
|
|
|
HF_TOKEN = os.getenv("HF_TOKEN") |
|
|
|
app = Flask(__name__) |
|
CORS(app) |
|
|
|
task_results = {} |
|
|
|
@app.route('/') |
|
def index(): |
|
return 'Hello' |
|
|
|
def process_analysis(task_id, text, model_name, topic, threshold): |
|
print(f"Processing task: {task_id}") |
|
|
|
|
|
print('Validate data') |
|
answer = validate_data(text, model_name, topic) |
|
if answer != '': |
|
task_results[task_id] = {'status': 'error', 'error': answer} |
|
return |
|
|
|
topic = check_topic(topic) |
|
hcRelativeToThreshold, hc, df_sentences = detect_human_text(model_name, topic, threshold, text) |
|
|
|
sentences = [ |
|
{ |
|
"sentence": row["sentence"], |
|
"lppt": row["response"], |
|
"pvalue": row["pvalue"], |
|
"color": "#f5aca4" if row["pvalue"] < 0.05 else "" |
|
} |
|
for _, row in df_sentences.iterrows() |
|
] |
|
|
|
foundPotentialEdits = [sentence for sentence in sentences if sentence['pvalue'] < 0.05] |
|
|
|
message = f'We found {len(foundPotentialEdits)} potential edits in the text' if len(foundPotentialEdits) > 0 else 'We couldn\'t find edits in the text' |
|
|
|
|
|
|
|
|
|
task_results[task_id] = {'status': 'completed', 'message': message, 'hcRelativeToThreshold': hcRelativeToThreshold, 'HC': hc, 'sentences': sentences} |
|
|
|
@app.route('/detectHumanInAIText/checkText', methods=['POST']) |
|
def check_text(): |
|
received_token = request.headers.get("Authorization") |
|
if received_token != HF_TOKEN: |
|
return jsonify({"error": "Unauthorized"}), 403 |
|
|
|
|
|
print('Get data') |
|
data = request.get_json() |
|
text = data.get('text') |
|
model_name = data.get('model') |
|
topic = data.get('topic') |
|
threshold = data.get('threshold') |
|
|
|
|
|
task_id = str(uuid.uuid4()) |
|
|
|
|
|
thread = threading.Thread(target=process_analysis, args=(task_id, text, model_name, topic, threshold)) |
|
thread.start() |
|
|
|
|
|
return jsonify({'taskId': task_id}), 202 |
|
|
|
@app.route('/detectHumanInAIText/getAnalyzeResults', methods=['GET']) |
|
def get_results(): |
|
received_token = request.headers.get("Authorization") |
|
if received_token != HF_TOKEN: |
|
return jsonify({"error": "Unauthorized"}), 403 |
|
|
|
task_id = request.args.get('taskId') |
|
|
|
if not task_id: |
|
return jsonify({'error': 'Missing taskId parameter'}), 400 |
|
|
|
if task_id not in task_results: |
|
return jsonify({'status': 'pending'}), 202 |
|
|
|
return jsonify(task_results.pop(task_id)), 200 |
|
|
|
def validate_data(text, model_name, topic): |
|
if text is None or text == '': |
|
return 'Text is missing' |
|
|
|
if model_name is None or model_name == '': |
|
return 'Model name is missing' |
|
|
|
if topic is None or topic == '': |
|
return 'Topic is missing' |
|
|
|
if model_name not in ['GPT2XL', 'PHI2']: |
|
return f'Model {model_name} not supported' |
|
|
|
if check_topic(topic) == None: |
|
return f'Topic {topic} not supported' |
|
|
|
return '' |
|
|
|
def check_topic(topic): |
|
topic_dict = { |
|
'empirical': 'empirical', |
|
'figures': 'characters', |
|
'landmarks': 'locations', |
|
'nature': 'nature', |
|
'games': 'video_games_series_movies', |
|
'wars': 'war' |
|
} |
|
|
|
return topic_dict[topic] if topic in topic_dict else None |