Spaces:

idkash1
/

detect-edits-in-ai-generated-text

Sleeping

App Files Files Community

detect-edits-in-ai-generated-text / app.py

idkash1

Update app.py

2201e16 verified 3 months ago

raw

history blame contribute delete

3.56 kB

	from flask import Flask, request, jsonify
	from flask_cors import CORS
	import pandas
	import threading
	import uuid
	import time
	from human_text_detect import detect_human_text
	import os

	HF_TOKEN = os.getenv("HF_TOKEN")

	app = Flask(__name__)
	CORS(app)

	task_results = {}

	@app.route('/')
	def index():
	return 'Hello'

	def process_analysis(task_id, text, model_name, topic, threshold):
	print(f"Processing task: {task_id}")

	# Validate data
	print('Validate data')
	answer = validate_data(text, model_name, topic)
	if answer != '':
	task_results[task_id] = {'status': 'error', 'error': answer}
	return

	topic = check_topic(topic)
	hcRelativeToThreshold, hc, df_sentences = detect_human_text(model_name, topic, threshold, text)

	sentences = [
	{
	"sentence": row["sentence"],
	"lppt": row["response"],
	"pvalue": row["pvalue"],
	"color": "#f5aca4" if row["pvalue"] < 0.05 else ""
	}
	for _, row in df_sentences.iterrows()
	]

	foundPotentialEdits = [sentence for sentence in sentences if sentence['pvalue'] < 0.05]

	message = f'We found {len(foundPotentialEdits)} potential edits in the text' if len(foundPotentialEdits) > 0 else 'We couldn\'t find edits in the text'



	# Store the result
	task_results[task_id] = {'status': 'completed', 'message': message, 'hcRelativeToThreshold': hcRelativeToThreshold, 'HC': hc, 'sentences': sentences}

	@app.route('/detectHumanInAIText/checkText', methods=['POST'])
	def check_text():
	received_token = request.headers.get("Authorization")
	if received_token != HF_TOKEN:
	return jsonify({"error": "Unauthorized"}), 403 # Forbidden

	# Get data
	print('Get data')
	data = request.get_json()
	text = data.get('text')
	model_name = data.get('model')
	topic = data.get('topic')
	threshold = data.get('threshold')

	# Generate a unique taskId
	task_id = str(uuid.uuid4())

	# Start processing in a separate thread
	thread = threading.Thread(target=process_analysis, args=(task_id, text, model_name, topic, threshold))
	thread.start()

	# Return taskId immediately
	return jsonify({'taskId': task_id}), 202

	@app.route('/detectHumanInAIText/getAnalyzeResults', methods=['GET'])
	def get_results():
	received_token = request.headers.get("Authorization")
	if received_token != HF_TOKEN:
	return jsonify({"error": "Unauthorized"}), 403 # Forbidden

	task_id = request.args.get('taskId')

	if not task_id:
	return jsonify({'error': 'Missing taskId parameter'}), 400

	if task_id not in task_results:
	return jsonify({'status': 'pending'}), 202

	return jsonify(task_results.pop(task_id)), 200

	def validate_data(text, model_name, topic):
	if text is None or text == '':
	return 'Text is missing'

	if model_name is None or model_name == '':
	return 'Model name is missing'

	if topic is None or topic == '':
	return 'Topic is missing'

	if model_name not in ['GPT2XL', 'PHI2']:
	return f'Model {model_name} not supported'

	if check_topic(topic) == None:
	return f'Topic {topic} not supported'

	return ''

	def check_topic(topic):
	topic_dict = {
	'empirical': 'empirical',
	'figures': 'characters',
	'landmarks': 'locations',
	'nature': 'nature',
	'games': 'video_games_series_movies',
	'wars': 'war'
	}

	return topic_dict[topic] if topic in topic_dict else None