# app.py - 文本检测多模型集成系统 import gradio as gr from transformers import pipeline import numpy as np import re # 加载多个检测模型 models = { "model1": { "name": "Xenova/distilbert-base-ai-generated-text-detection", "detector": None, "weight": 0.4 }, "model2": { "name": "Hello-SimpleAI/chatgpt-detector-roberta", "detector": None, "weight": 0.3 }, "model3": { "name": "roberta-base-openai-detector", "detector": None, "weight": 0.3 } } # 初始化模型 for key in models: try: models[key]["detector"] = pipeline("text-classification", model=models[key]["name"]) print(f"成功加载模型: {models[key]['name']}") except Exception as e: print(f"加载模型 {models[key]['name']} 失败: {str(e)}") models[key]["detector"] = None def analyze_text_features(text): # 文本特征分析 features = {} features["length"] = len(text) words = text.split() features["word_count"] = len(words) features["avg_word_length"] = sum(len(word) for word in words) / max(1, len(words)) features["unique_words_ratio"] = len(set(text.lower().split())) / max(1, len(words)) # 句子分析 sentences = re.split(r'[.!?]+', text) features["sentence_count"] = len(sentences) features["avg_sentence_length"] = sum(len(s.split()) for s in sentences) / max(1, len(sentences)) # 词汇多样性 if len(words) > 0: features["lexical_diversity"] = len(set(words)) / len(words) # 标点符号比例 punctuation_count = sum(1 for char in text if char in ",.!?;:\"'()[]{}") features["punctuation_ratio"] = punctuation_count / max(1, len(text)) return features def detect_ai_text(text): if not text or len(text.strip()) < 50: return {"error": "文本太短,无法可靠检测"} results = {} valid_models = 0 weighted_ai_probability = 0 # 使用每个模型进行预测 for key, model_info in models.items(): if model_info["detector"] is not None: try: result = model_info["detector"](text) # 提取结果 label = result[0]["label"] score = result[0]["score"] # 确定AI生成概率 if "ai" in label.lower() or "chatgpt" in label.lower() or "generated" in label.lower(): ai_probability = score else: ai_probability = 1 - score # 添加到结果 results[key] = { "model_name": model_info["name"], "ai_probability": ai_probability, "label": label, "score": score } # 累加加权概率 weighted_ai_probability += ai_probability * model_info["weight"] valid_models += 1 except Exception as e: results[key] = { "model_name": model_info["name"], "error": str(e) } # 计算最终加权概率 final_ai_probability = weighted_ai_probability / max(sum(m["weight"] for k, m in models.items() if m["detector"] is not None), 1) # 分析文本特征 text_features = analyze_text_features(text) # 确定置信度级别 if final_ai_probability > 0.7: confidence_level = "高概率AI生成" elif final_ai_probability < 0.3: confidence_level = "高概率人类创作" else: confidence_level = "无法确定" # 构建最终结果 final_result = { "ai_probability": final_ai_probability, "confidence_level": confidence_level, "individual_model_results": results, "features": text_features } return final_result # 创建Gradio界面 iface = gr.Interface( fn=detect_ai_text, inputs=gr.Textbox(lines=10, placeholder="粘贴要检测的文本..."), outputs=gr.JSON(), title="增强型AI文本检测API", description="多模型集成检测文本是否由AI生成", examples=[ ["这是一段示例文本,用于测试AI文本检测功能。请输入至少50个字符的文本以获得准确的检测结果。"] ], allow_flagging="never" ) iface.launch()