File size: 3,562 Bytes
80dcd43
 
 
09035e3
9e10878
09035e3
80dcd43
dab9f5f
 
a969752
80dcd43
 
 
 
7798d42
 
80dcd43
 
 
 
6cca969
7798d42
80dcd43
 
 
6cca969
80dcd43
656f70b
7798d42
 
63d13af
6cca969
80dcd43
 
 
 
 
 
 
 
 
 
926e64b
 
 
 
 
 
7798d42
 
926e64b
7798d42
 
 
56c7adc
482e506
 
dab9f5f
7798d42
 
 
 
 
6cca969
07cb8d2
7798d42
 
 
 
 
6cca969
7798d42
 
 
2201e16
7798d42
 
 
482e506
 
 
dab9f5f
7798d42
 
 
 
 
 
 
 
 
80dcd43
6cca969
80dcd43
 
 
 
 
 
6cca969
 
80dcd43
 
 
 
6cca969
 
80dcd43
 
 
 
 
c14636f
 
 
 
 
 
80dcd43
 
c79a37e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from flask import Flask, request, jsonify
from flask_cors import CORS
import pandas
import threading
import uuid
import time
from human_text_detect import detect_human_text
import os

HF_TOKEN = os.getenv("HF_TOKEN")

app = Flask(__name__)
CORS(app)

task_results = {}

@app.route('/')
def index():
    return 'Hello'

def process_analysis(task_id, text, model_name, topic, threshold):
    print(f"Processing task: {task_id}")

    # Validate data
    print('Validate data')
    answer = validate_data(text, model_name, topic)
    if answer != '':
        task_results[task_id] = {'status': 'error', 'error': answer}
        return

    topic = check_topic(topic)
    hcRelativeToThreshold, hc, df_sentences = detect_human_text(model_name, topic, threshold, text)

    sentences = [
        {
            "sentence": row["sentence"],
            "lppt": row["response"],
            "pvalue": row["pvalue"],
            "color": "#f5aca4" if row["pvalue"] < 0.05 else ""
        }
        for _, row in df_sentences.iterrows()
    ]

    foundPotentialEdits = [sentence for sentence in sentences if sentence['pvalue'] < 0.05]
    
    message = f'We found {len(foundPotentialEdits)} potential edits in the text' if len(foundPotentialEdits) > 0 else 'We couldn\'t find edits in the text'

    
    
    # Store the result
    task_results[task_id] = {'status': 'completed', 'message': message, 'hcRelativeToThreshold': hcRelativeToThreshold, 'HC': hc, 'sentences': sentences}

@app.route('/detectHumanInAIText/checkText', methods=['POST'])
def check_text():
    received_token = request.headers.get("Authorization")
    if received_token != HF_TOKEN:
        return jsonify({"error": "Unauthorized"}), 403  # Forbidden
    
    # Get data
    print('Get data')
    data = request.get_json()
    text = data.get('text')
    model_name = data.get('model')
    topic = data.get('topic')
    threshold = data.get('threshold')

    # Generate a unique taskId
    task_id = str(uuid.uuid4())

    # Start processing in a separate thread
    thread = threading.Thread(target=process_analysis, args=(task_id, text, model_name, topic, threshold))
    thread.start()

    # Return taskId immediately
    return jsonify({'taskId': task_id}), 202

@app.route('/detectHumanInAIText/getAnalyzeResults', methods=['GET'])
def get_results():
    received_token = request.headers.get("Authorization")
    if received_token != HF_TOKEN:
        return jsonify({"error": "Unauthorized"}), 403  # Forbidden
        
    task_id = request.args.get('taskId')
    
    if not task_id:
        return jsonify({'error': 'Missing taskId parameter'}), 400
    
    if task_id not in task_results:
        return jsonify({'status': 'pending'}), 202
    
    return jsonify(task_results.pop(task_id)), 200

def validate_data(text, model_name, topic):
    if text is None or text == '':
        return 'Text is missing'

    if model_name is None or model_name == '':
        return 'Model name is missing'

    if topic is None or topic == '':
        return 'Topic is missing'
    
    if model_name not in ['GPT2XL', 'PHI2']:
        return f'Model {model_name} not supported'
    
    if check_topic(topic) == None:
        return f'Topic {topic} not supported'
    
    return ''

def check_topic(topic):
    topic_dict = {
        'empirical': 'empirical',
        'figures': 'characters',
        'landmarks': 'locations',
        'nature': 'nature',
        'games': 'video_games_series_movies',
        'wars': 'war'
    }

    return topic_dict[topic] if topic in topic_dict else None