Delete app.py
Browse files
app.py
DELETED
@@ -1,78 +0,0 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
import pandas as pd
|
3 |
-
import numpy as np
|
4 |
-
from transformers import pipeline
|
5 |
-
from sklearn.impute import SimpleImputer
|
6 |
-
from sklearn.ensemble import IsolationForest
|
7 |
-
import subprocess
|
8 |
-
import sys
|
9 |
-
|
10 |
-
# التحقق من تثبيت PyTorch أو TensorFlow
|
11 |
-
try:
|
12 |
-
import torch
|
13 |
-
except ImportError:
|
14 |
-
print("PyTorch غير مثبت، سيتم تثبيته الآن.")
|
15 |
-
subprocess.check_call([sys.executable, "-m", "pip", "install", "torch"])
|
16 |
-
|
17 |
-
try:
|
18 |
-
import tensorflow
|
19 |
-
except ImportError:
|
20 |
-
print("TensorFlow غير مثبت، سيتم تثبيته الآن.")
|
21 |
-
subprocess.check_call([sys.executable, "-m", "pip", "install", "tensorflow"])
|
22 |
-
|
23 |
-
# تحميل نموذج التلخيص من Hugging Face
|
24 |
-
summarizer = pipeline("summarization", model="t5-small")
|
25 |
-
|
26 |
-
def analyze_data_quality(file):
|
27 |
-
df = pd.read_csv(file.name)
|
28 |
-
|
29 |
-
# البحث عن القيم الناقصة
|
30 |
-
missing_values = df.isnull().sum()
|
31 |
-
missing_summary = missing_values[missing_values > 0].to_string()
|
32 |
-
|
33 |
-
# البحث عن القيم الشاذة باستخدام Isolation Forest
|
34 |
-
clf = IsolationForest(contamination=0.05, random_state=42)
|
35 |
-
outliers = clf.fit_predict(df.select_dtypes(include=[np.number]))
|
36 |
-
outlier_count = (outliers == -1).sum()
|
37 |
-
|
38 |
-
# تلخيص النتائج
|
39 |
-
report = f"🔍 تحليل جودة البيانات:\n\n"
|
40 |
-
report += f"📌 عدد القيم الناقصة: {missing_values.sum()}\n"
|
41 |
-
report += f"📌 عدد القيم الشاذة: {outlier_count}\n"
|
42 |
-
report += "\n📊 تفاصيل القيم الناقصة:\n" + missing_summary if missing_summary else "\n✅ لا توجد قيم ناقصة."
|
43 |
-
|
44 |
-
summary = summarizer(report, max_length=100, do_sample=False)[0]['summary_text']
|
45 |
-
|
46 |
-
return summary
|
47 |
-
|
48 |
-
def clean_data(file):
|
49 |
-
df = pd.read_csv(file.name)
|
50 |
-
|
51 |
-
# معالجة القيم الناقصة بالتعبئة
|
52 |
-
imputer = SimpleImputer(strategy="mean")
|
53 |
-
df[df.select_dtypes(include=[np.number]).columns] = imputer.fit_transform(df.select_dtypes(include=[np.number]))
|
54 |
-
|
55 |
-
# إزالة القيم الشاذة
|
56 |
-
clf = IsolationForest(contamination=0.05, random_state=42)
|
57 |
-
outliers = clf.fit_predict(df.select_dtypes(include=[np.number]))
|
58 |
-
df_cleaned = df[outliers != -1]
|
59 |
-
|
60 |
-
cleaned_file = "cleaned_data.csv"
|
61 |
-
df_cleaned.to_csv(cleaned_file, index=False)
|
62 |
-
return cleaned_file
|
63 |
-
|
64 |
-
# إنشاء واجهة Gradio
|
65 |
-
with gr.Blocks() as app:
|
66 |
-
gr.Markdown("## 🛠️ محلل جودة البيانات")
|
67 |
-
|
68 |
-
file_input = gr.File(label="📂 رفع ملف CSV")
|
69 |
-
analysis_output = gr.Textbox(label="📊 تقرير جودة البيانات")
|
70 |
-
cleaned_file_output = gr.File(label="✅ تحميل البيانات المنظفة")
|
71 |
-
|
72 |
-
analyze_button = gr.Button("🔍 تحليل الجودة")
|
73 |
-
clean_button = gr.Button("🧹 تنظيف البيانات")
|
74 |
-
|
75 |
-
analyze_button.click(analyze_data_quality, inputs=file_input, outputs=analysis_output)
|
76 |
-
clean_button.click(clean_data, inputs=file_input, outputs=cleaned_file_output)
|
77 |
-
|
78 |
-
app.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|