File size: 4,487 Bytes
2e3fe2e
5d40c09
7aaa31b
2e3fe2e
 
5d40c09
2e3fe2e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5d40c09
7aaa31b
 
 
5d40c09
2e3fe2e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5d40c09
2e3fe2e
 
5d40c09
2e3fe2e
 
7aaa31b
2e3fe2e
 
 
 
 
 
 
5d40c09
2e3fe2e
 
 
 
 
 
5d40c09
 
2e3fe2e
5d40c09
 
 
7aaa31b
 
5d40c09
2e3fe2e
 
 
 
 
 
5d40c09
 
 
2e3fe2e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# app.py - 文本检测多模型集成系统
import gradio as gr
from transformers import pipeline
import numpy as np
import re

# 加载多个检测模型
models = {
    "model1": {
        "name": "Xenova/distilbert-base-ai-generated-text-detection",
        "detector": None,
        "weight": 0.4
    },
    "model2": {
        "name": "Hello-SimpleAI/chatgpt-detector-roberta",
        "detector": None,
        "weight": 0.3
    },
    "model3": {
        "name": "roberta-base-openai-detector",
        "detector": None,
        "weight": 0.3
    }
}

# 初始化模型
for key in models:
    try:
        models[key]["detector"] = pipeline("text-classification", model=models[key]["name"])
        print(f"成功加载模型: {models[key]['name']}")
    except Exception as e:
        print(f"加载模型 {models[key]['name']} 失败: {str(e)}")
        models[key]["detector"] = None

def analyze_text_features(text):
    # 文本特征分析
    features = {}
    features["length"] = len(text)
    words = text.split()
    features["word_count"] = len(words)
    features["avg_word_length"] = sum(len(word) for word in words) / max(1, len(words))
    features["unique_words_ratio"] = len(set(text.lower().split())) / max(1, len(words))
    
    # 句子分析
    sentences = re.split(r'[.!?]+', text)
    features["sentence_count"] = len(sentences)
    features["avg_sentence_length"] = sum(len(s.split()) for s in sentences) / max(1, len(sentences))
    
    # 词汇多样性
    if len(words) > 0:
        features["lexical_diversity"] = len(set(words)) / len(words)
    
    # 标点符号比例
    punctuation_count = sum(1 for char in text if char in ",.!?;:\"'()[]{}")
    features["punctuation_ratio"] = punctuation_count / max(1, len(text))
    
    return features

def detect_ai_text(text):
    if not text or len(text.strip()) < 50:
        return {"error": "文本太短,无法可靠检测"}
    
    results = {}
    valid_models = 0
    weighted_ai_probability = 0
    
    # 使用每个模型进行预测
    for key, model_info in models.items():
        if model_info["detector"] is not None:
            try:
                result = model_info["detector"](text)
                
                # 提取结果
                label = result[0]["label"]
                score = result[0]["score"]
                
                # 确定AI生成概率
                if "ai" in label.lower() or "chatgpt" in label.lower() or "generated" in label.lower():
                    ai_probability = score
                else:
                    ai_probability = 1 - score
                
                # 添加到结果
                results[key] = {
                    "model_name": model_info["name"],
                    "ai_probability": ai_probability,
                    "label": label,
                    "score": score
                }
                
                # 累加加权概率
                weighted_ai_probability += ai_probability * model_info["weight"]
                valid_models += 1
            
            except Exception as e:
                results[key] = {
                    "model_name": model_info["name"],
                    "error": str(e)
                }
    
    # 计算最终加权概率
    final_ai_probability = weighted_ai_probability / max(sum(m["weight"] for k, m in models.items() if m["detector"] is not None), 1)
    
    # 分析文本特征
    text_features = analyze_text_features(text)
    
    # 确定置信度级别
    if final_ai_probability > 0.7:
        confidence_level = "高概率AI生成"
    elif final_ai_probability < 0.3:
        confidence_level = "高概率人类创作"
    else:
        confidence_level = "无法确定"
    
    # 构建最终结果
    final_result = {
        "ai_probability": final_ai_probability,
        "confidence_level": confidence_level,
        "individual_model_results": results,
        "features": text_features
    }
    
    return final_result

# 创建Gradio界面
iface = gr.Interface(
    fn=detect_ai_text,
    inputs=gr.Textbox(lines=10, placeholder="粘贴要检测的文本..."),
    outputs=gr.JSON(),
    title="增强型AI文本检测API",
    description="多模型集成检测文本是否由AI生成",
    examples=[
        ["这是一段示例文本,用于测试AI文本检测功能。请输入至少50个字符的文本以获得准确的检测结果。"]
    ],
    allow_flagging="never"
)

iface.launch()