Spaces:

aaappp7878
/

ai-text-detector

Running

App Files Files Community

aaappp7878 commited on 27 days ago

Commit

2e3fe2e

verified ·

1 Parent(s): 7aaa31b

Update app.py

Browse files

Files changed (1) hide show

app.py +116 -29

app.py CHANGED Viewed

@@ -1,52 +1,139 @@
 import gradio as gr
 from transformers import pipeline
-# 使用公开可用的AI文本检测模型
-# 这个模型专门用于检测AI生成文本
-detector = pipeline("text-classification", model="Xenova/distilbert-base-ai-generated-text-detection")
 def detect_ai_text(text):
     if not text or len(text.strip()) < 50:
         return {"error": "文本太短，无法可靠检测"}
-    result = detector(text)
-    # 提取结果
-    label = result[0]["label"]
-    score = result[0]["score"]
-    # 格式化为人类可读结果
-    if "ai" in label.lower():  # AI生成
-        ai_probability = score
-    else:  # 人类撰写
-        ai_probability = 1 - score
-    # 分析特征
-    features = analyze_text_features(text)
-    return {
-        "ai_probability": float(ai_probability),
-        "features": features,
-        "confidence": float(score),
-        "label": label
     }
-def analyze_text_features(text):
-    # 简单文本特征分析
-    features = {}
-    features["length"] = len(text)
-    features["avg_word_length"] = sum(len(word) for word in text.split()) / max(1, len(text.split()))
-    features["unique_words_ratio"] = len(set(text.lower().split())) / max(1, len(text.split()))
-    return features
 # 创建Gradio界面
 iface = gr.Interface(
     fn=detect_ai_text,
     inputs=gr.Textbox(lines=10, placeholder="粘贴要检测的文本..."),
     outputs=gr.JSON(),
-    title="AI文本检测API",
-    description="检测文本是否由AI生成"
 )
 iface.launch()

+# app.py - 文本检测多模型集成系统
 import gradio as gr
 from transformers import pipeline
+import numpy as np
+import re
+# 加载多个检测模型
+models = {
+    "model1": {
+        "name": "Xenova/distilbert-base-ai-generated-text-detection",
+        "detector": None,
+        "weight": 0.4
+    },
+    "model2": {
+        "name": "Hello-SimpleAI/chatgpt-detector-roberta",
+        "detector": None,
+        "weight": 0.3
+    },
+    "model3": {
+        "name": "roberta-base-openai-detector",
+        "detector": None,
+        "weight": 0.3
+    }
+}
+# 初始化模型
+for key in models:
+    try:
+        models[key]["detector"] = pipeline("text-classification", model=models[key]["name"])
+        print(f"成功加载模型: {models[key]['name']}")
+    except Exception as e:
+        print(f"加载模型 {models[key]['name']} 失败: {str(e)}")
+        models[key]["detector"] = None
+def analyze_text_features(text):
+    # 文本特征分析
+    features = {}
+    features["length"] = len(text)
+    words = text.split()
+    features["word_count"] = len(words)
+    features["avg_word_length"] = sum(len(word) for word in words) / max(1, len(words))
+    features["unique_words_ratio"] = len(set(text.lower().split())) / max(1, len(words))
+    # 句子分析
+    sentences = re.split(r'[.!?]+', text)
+    features["sentence_count"] = len(sentences)
+    features["avg_sentence_length"] = sum(len(s.split()) for s in sentences) / max(1, len(sentences))
+    # 词汇多样性
+    if len(words) > 0:
+        features["lexical_diversity"] = len(set(words)) / len(words)
+    # 标点符号比例
+    punctuation_count = sum(1 for char in text if char in ",.!?;:\"'()[]{}")
+    features["punctuation_ratio"] = punctuation_count / max(1, len(text))
+    return features
 def detect_ai_text(text):
     if not text or len(text.strip()) < 50:
         return {"error": "文本太短，无法可靠检测"}
+    results = {}
+    valid_models = 0
+    weighted_ai_probability = 0
+    # 使用每个模型进行预测
+    for key, model_info in models.items():
+        if model_info["detector"] is not None:
+            try:
+                result = model_info["detector"](text)
+                # 提取结果
+                label = result[0]["label"]
+                score = result[0]["score"]
+                # 确定AI生成概率
+                if "ai" in label.lower() or "chatgpt" in label.lower() or "generated" in label.lower():
+                    ai_probability = score
+                else:
+                    ai_probability = 1 - score
+                # 添加到结果
+                results[key] = {
+                    "model_name": model_info["name"],
+                    "ai_probability": ai_probability,
+                    "label": label,
+                    "score": score
+                }
+                # 累加加权概率
+                weighted_ai_probability += ai_probability * model_info["weight"]
+                valid_models += 1
+            except Exception as e:
+                results[key] = {
+                    "model_name": model_info["name"],
+                    "error": str(e)
+                }
+    # 计算最终加权概率
+    final_ai_probability = weighted_ai_probability / max(sum(m["weight"] for k, m in models.items() if m["detector"] is not None), 1)
+    # 分析文本特征
+    text_features = analyze_text_features(text)
+    # 确定置信度级别
+    if final_ai_probability > 0.7:
+        confidence_level = "高概率AI生成"
+    elif final_ai_probability < 0.3:
+        confidence_level = "高概率人类创作"
+    else:
+        confidence_level = "无法确定"
+    # 构建最终结果
+    final_result = {
+        "ai_probability": final_ai_probability,
+        "confidence_level": confidence_level,
+        "individual_model_results": results,
+        "features": text_features
     }
+    return final_result
 # 创建Gradio界面
 iface = gr.Interface(
     fn=detect_ai_text,
     inputs=gr.Textbox(lines=10, placeholder="粘贴要检测的文本..."),
     outputs=gr.JSON(),
+    title="增强型AI文本检测API",
+    description="多模型集成检测文本是否由AI生成",
+    examples=[
+        ["这是一段示例文本，用于测试AI文本检测功能。请输入至少50个字符的文本以获得准确的检测结果。"]
+    ],
+    allow_flagging="never"
 )
 iface.launch()