ChingCL commited on
Commit
5e200b9
·
verified ·
1 Parent(s): 4d3db91

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -1
app.py CHANGED
@@ -1,4 +1,90 @@
1
- import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import pandas as pd
3
  import re
4
  import json
 
1
+ import gradio as grimport gradio as gr
2
+ import pandas as pd
3
+ import re
4
+ import json
5
+
6
+ # 提取 JSON 格式中的文本部分
7
+ def extract_text_from_json(text):
8
+ try:
9
+ data = json.loads(text)
10
+ if isinstance(data, dict):
11
+ # 提取 JSON 中可能包含的文本內容
12
+ if 'question' in data:
13
+ return data['question']['content']
14
+ if 'content' in data:
15
+ return data['content']
16
+ except json.JSONDecodeError:
17
+ return text
18
+ return text
19
+
20
+ # 第一個檢查:檢查 $ 符號前後是否有空格
21
+ def check_spacing_around_dollar(df):
22
+ errors = []
23
+ for i, row in df.iterrows():
24
+ for col in df.columns:
25
+ text = extract_text_from_json(str(row[col]))
26
+ matches = re.finditer(r'(\$\S+|\S+\$)', text)
27
+ for match in matches:
28
+ if (match.start() > 0 and text[match.start() - 1] != ' ') or (match.end() < len(text) and text[match.end()] != ' '):
29
+ errors.append(f"行 {i+1} 列 '{col}': '{text}'")
30
+ return errors
31
+
32
+ # 第二個檢查:檢查 $ 符號之間是否有空格
33
+ def check_spacing_between_dollars(df):
34
+ errors = []
35
+ for i, row in df.iterrows():
36
+ for col in df.columns:
37
+ text = extract_text_from_json(str(row[col]))
38
+ matches = re.finditer(r'\$\S+?(?=\$)', text)
39
+ for match in matches:
40
+ if text[match.end()] != ' ' and text[match.start() - 1] != ' ':
41
+ errors.append(f"行 {i+1} 列 '{col}': '{text}'")
42
+ return errors
43
+
44
+ # 第三個檢查:檢查數字前後是否有 $ 符號
45
+ def check_numbers_surrounded_by_dollar(df):
46
+ errors = []
47
+ for i, row in df.iterrows():
48
+ for col in df.columns:
49
+ text = extract_text_from_json(str(row[col]))
50
+ matches = re.finditer(r'\b\d+\b', text)
51
+ for match in matches:
52
+ if not (match.start() > 0 and text[match.start() - 1] == '$' and match.end() < len(text) and text[match.end()] == '$'):
53
+ errors.append(f"行 {i+1} 列 '{col}': '{text}'")
54
+ return errors
55
+
56
+ # 處理檔案並執行檢查
57
+ def process_file(file):
58
+ if file.name.endswith('.csv'):
59
+ df = pd.read_csv(file.name)
60
+ elif file.name.endswith('.xlsx'):
61
+ df = pd.read_excel(file.name)
62
+ else:
63
+ return "只支持 CSV 和 XLSX 檔案"
64
+
65
+ # 執行檢查
66
+ errors1 = check_spacing_around_dollar(df)
67
+ errors2 = check_spacing_between_dollars(df)
68
+ errors3 = check_numbers_surrounded_by_dollar(df)
69
+
70
+ return {
71
+ "第一個檢查": errors1,
72
+ "第二個檢查": errors2,
73
+ "第三個檢查": errors3
74
+ }
75
+
76
+ # Gradio 介面
77
+ iface = gr.Interface(
78
+ fn=process_file,
79
+ inputs=gr.File(label="上傳 CSV 或 XLSX 檔案"),
80
+ outputs=gr.JSON(label="檢查結果"),
81
+ title="校對系統",
82
+ description="這個系統會檢查 CSV 或 XLSX 檔案中的格式錯誤,包括 $ 符號和數字的空格錯誤。"
83
+ )
84
+
85
+ if __name__ == "__main__":
86
+ iface.launch()
87
+
88
  import pandas as pd
89
  import re
90
  import json