aaappp7878 commited on
Commit
2e3fe2e
·
verified ·
1 Parent(s): 7aaa31b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -29
app.py CHANGED
@@ -1,52 +1,139 @@
 
1
  import gradio as gr
2
  from transformers import pipeline
 
 
3
 
4
- # 使用公开可用的AI文本检测模型
5
- # 这个模型专门用于检测AI生成文本
6
- detector = pipeline("text-classification", model="Xenova/distilbert-base-ai-generated-text-detection")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  def detect_ai_text(text):
9
  if not text or len(text.strip()) < 50:
10
  return {"error": "文本太短,无法可靠检测"}
11
 
12
- result = detector(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- # 提取结果
15
- label = result[0]["label"]
16
- score = result[0]["score"]
17
 
18
- # 格式化为人类可读结果
19
- if "ai" in label.lower(): # AI生成
20
- ai_probability = score
21
- else: # 人类撰写
22
- ai_probability = 1 - score
23
 
24
- # 分析特征
25
- features = analyze_text_features(text)
 
 
 
 
 
26
 
27
- return {
28
- "ai_probability": float(ai_probability),
29
- "features": features,
30
- "confidence": float(score),
31
- "label": label
 
32
  }
33
-
34
- def analyze_text_features(text):
35
- # 简单文本特征分析
36
- features = {}
37
- features["length"] = len(text)
38
- features["avg_word_length"] = sum(len(word) for word in text.split()) / max(1, len(text.split()))
39
- features["unique_words_ratio"] = len(set(text.lower().split())) / max(1, len(text.split()))
40
 
41
- return features
42
 
43
  # 创建Gradio界面
44
  iface = gr.Interface(
45
  fn=detect_ai_text,
46
  inputs=gr.Textbox(lines=10, placeholder="粘贴要检测的文本..."),
47
  outputs=gr.JSON(),
48
- title="AI文本检测API",
49
- description="检测文本是否由AI生成"
 
 
 
 
50
  )
51
 
52
  iface.launch()
 
 
1
+ # app.py - 文本检测多模型集成系统
2
  import gradio as gr
3
  from transformers import pipeline
4
+ import numpy as np
5
+ import re
6
 
7
+ # 加载多个检测模型
8
+ models = {
9
+ "model1": {
10
+ "name": "Xenova/distilbert-base-ai-generated-text-detection",
11
+ "detector": None,
12
+ "weight": 0.4
13
+ },
14
+ "model2": {
15
+ "name": "Hello-SimpleAI/chatgpt-detector-roberta",
16
+ "detector": None,
17
+ "weight": 0.3
18
+ },
19
+ "model3": {
20
+ "name": "roberta-base-openai-detector",
21
+ "detector": None,
22
+ "weight": 0.3
23
+ }
24
+ }
25
+
26
+ # 初始化模型
27
+ for key in models:
28
+ try:
29
+ models[key]["detector"] = pipeline("text-classification", model=models[key]["name"])
30
+ print(f"成功加载模型: {models[key]['name']}")
31
+ except Exception as e:
32
+ print(f"加载模型 {models[key]['name']} 失败: {str(e)}")
33
+ models[key]["detector"] = None
34
+
35
+ def analyze_text_features(text):
36
+ # 文本特征分析
37
+ features = {}
38
+ features["length"] = len(text)
39
+ words = text.split()
40
+ features["word_count"] = len(words)
41
+ features["avg_word_length"] = sum(len(word) for word in words) / max(1, len(words))
42
+ features["unique_words_ratio"] = len(set(text.lower().split())) / max(1, len(words))
43
+
44
+ # 句子分析
45
+ sentences = re.split(r'[.!?]+', text)
46
+ features["sentence_count"] = len(sentences)
47
+ features["avg_sentence_length"] = sum(len(s.split()) for s in sentences) / max(1, len(sentences))
48
+
49
+ # 词汇多样性
50
+ if len(words) > 0:
51
+ features["lexical_diversity"] = len(set(words)) / len(words)
52
+
53
+ # 标点符号比例
54
+ punctuation_count = sum(1 for char in text if char in ",.!?;:\"'()[]{}")
55
+ features["punctuation_ratio"] = punctuation_count / max(1, len(text))
56
+
57
+ return features
58
 
59
  def detect_ai_text(text):
60
  if not text or len(text.strip()) < 50:
61
  return {"error": "文本太短,无法可靠检测"}
62
 
63
+ results = {}
64
+ valid_models = 0
65
+ weighted_ai_probability = 0
66
+
67
+ # 使用每个模型进行预测
68
+ for key, model_info in models.items():
69
+ if model_info["detector"] is not None:
70
+ try:
71
+ result = model_info["detector"](text)
72
+
73
+ # 提取结果
74
+ label = result[0]["label"]
75
+ score = result[0]["score"]
76
+
77
+ # 确定AI生成概率
78
+ if "ai" in label.lower() or "chatgpt" in label.lower() or "generated" in label.lower():
79
+ ai_probability = score
80
+ else:
81
+ ai_probability = 1 - score
82
+
83
+ # 添加到结果
84
+ results[key] = {
85
+ "model_name": model_info["name"],
86
+ "ai_probability": ai_probability,
87
+ "label": label,
88
+ "score": score
89
+ }
90
+
91
+ # 累加加权概率
92
+ weighted_ai_probability += ai_probability * model_info["weight"]
93
+ valid_models += 1
94
+
95
+ except Exception as e:
96
+ results[key] = {
97
+ "model_name": model_info["name"],
98
+ "error": str(e)
99
+ }
100
 
101
+ # 计算最终加权概率
102
+ final_ai_probability = weighted_ai_probability / max(sum(m["weight"] for k, m in models.items() if m["detector"] is not None), 1)
 
103
 
104
+ # 分析文本特征
105
+ text_features = analyze_text_features(text)
 
 
 
106
 
107
+ # 确定置信度级别
108
+ if final_ai_probability > 0.7:
109
+ confidence_level = "高概率AI生成"
110
+ elif final_ai_probability < 0.3:
111
+ confidence_level = "高概率人类创作"
112
+ else:
113
+ confidence_level = "无法确定"
114
 
115
+ # 构建最终结果
116
+ final_result = {
117
+ "ai_probability": final_ai_probability,
118
+ "confidence_level": confidence_level,
119
+ "individual_model_results": results,
120
+ "features": text_features
121
  }
 
 
 
 
 
 
 
122
 
123
+ return final_result
124
 
125
  # 创建Gradio界面
126
  iface = gr.Interface(
127
  fn=detect_ai_text,
128
  inputs=gr.Textbox(lines=10, placeholder="粘贴要检测的文本..."),
129
  outputs=gr.JSON(),
130
+ title="增强型AI文本检测API",
131
+ description="多模型集成检测文本是否由AI生成",
132
+ examples=[
133
+ ["这是一段示例文本,用于测试AI文本检测功能。请输入至少50个字符的文本以获得准确的检测结果。"]
134
+ ],
135
+ allow_flagging="never"
136
  )
137
 
138
  iface.launch()
139
+