File size: 3,562 Bytes
80dcd43 09035e3 9e10878 09035e3 80dcd43 dab9f5f a969752 80dcd43 7798d42 80dcd43 6cca969 7798d42 80dcd43 6cca969 80dcd43 656f70b 7798d42 63d13af 6cca969 80dcd43 926e64b 7798d42 926e64b 7798d42 56c7adc 482e506 dab9f5f 7798d42 6cca969 07cb8d2 7798d42 6cca969 7798d42 2201e16 7798d42 482e506 dab9f5f 7798d42 80dcd43 6cca969 80dcd43 6cca969 80dcd43 6cca969 80dcd43 c14636f 80dcd43 c79a37e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
from flask import Flask, request, jsonify
from flask_cors import CORS
import pandas
import threading
import uuid
import time
from human_text_detect import detect_human_text
import os
HF_TOKEN = os.getenv("HF_TOKEN")
app = Flask(__name__)
CORS(app)
task_results = {}
@app.route('/')
def index():
return 'Hello'
def process_analysis(task_id, text, model_name, topic, threshold):
print(f"Processing task: {task_id}")
# Validate data
print('Validate data')
answer = validate_data(text, model_name, topic)
if answer != '':
task_results[task_id] = {'status': 'error', 'error': answer}
return
topic = check_topic(topic)
hcRelativeToThreshold, hc, df_sentences = detect_human_text(model_name, topic, threshold, text)
sentences = [
{
"sentence": row["sentence"],
"lppt": row["response"],
"pvalue": row["pvalue"],
"color": "#f5aca4" if row["pvalue"] < 0.05 else ""
}
for _, row in df_sentences.iterrows()
]
foundPotentialEdits = [sentence for sentence in sentences if sentence['pvalue'] < 0.05]
message = f'We found {len(foundPotentialEdits)} potential edits in the text' if len(foundPotentialEdits) > 0 else 'We couldn\'t find edits in the text'
# Store the result
task_results[task_id] = {'status': 'completed', 'message': message, 'hcRelativeToThreshold': hcRelativeToThreshold, 'HC': hc, 'sentences': sentences}
@app.route('/detectHumanInAIText/checkText', methods=['POST'])
def check_text():
received_token = request.headers.get("Authorization")
if received_token != HF_TOKEN:
return jsonify({"error": "Unauthorized"}), 403 # Forbidden
# Get data
print('Get data')
data = request.get_json()
text = data.get('text')
model_name = data.get('model')
topic = data.get('topic')
threshold = data.get('threshold')
# Generate a unique taskId
task_id = str(uuid.uuid4())
# Start processing in a separate thread
thread = threading.Thread(target=process_analysis, args=(task_id, text, model_name, topic, threshold))
thread.start()
# Return taskId immediately
return jsonify({'taskId': task_id}), 202
@app.route('/detectHumanInAIText/getAnalyzeResults', methods=['GET'])
def get_results():
received_token = request.headers.get("Authorization")
if received_token != HF_TOKEN:
return jsonify({"error": "Unauthorized"}), 403 # Forbidden
task_id = request.args.get('taskId')
if not task_id:
return jsonify({'error': 'Missing taskId parameter'}), 400
if task_id not in task_results:
return jsonify({'status': 'pending'}), 202
return jsonify(task_results.pop(task_id)), 200
def validate_data(text, model_name, topic):
if text is None or text == '':
return 'Text is missing'
if model_name is None or model_name == '':
return 'Model name is missing'
if topic is None or topic == '':
return 'Topic is missing'
if model_name not in ['GPT2XL', 'PHI2']:
return f'Model {model_name} not supported'
if check_topic(topic) == None:
return f'Topic {topic} not supported'
return ''
def check_topic(topic):
topic_dict = {
'empirical': 'empirical',
'figures': 'characters',
'landmarks': 'locations',
'nature': 'nature',
'games': 'video_games_series_movies',
'wars': 'war'
}
return topic_dict[topic] if topic in topic_dict else None |