Spaces:
Sleeping
Sleeping
Upload st_data_parser.py
Browse files- st_data_parser.py +16 -0
st_data_parser.py
CHANGED
@@ -4,6 +4,8 @@
|
|
4 |
2. 目前docx模块在huggingface的python3.10报错。暂时不支持docx文件。
|
5 |
|
6 |
"""
|
|
|
|
|
7 |
# -*- coding: utf-8 -*-
|
8 |
import numpy as np
|
9 |
import pandas as pd
|
@@ -72,6 +74,20 @@ def parser(file):
|
|
72 |
df = pd.read_json(json_file)
|
73 |
file_content = df.to_string()
|
74 |
# print('file_content:', file_content)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
|
76 |
return file_content
|
77 |
|
|
|
4 |
2. 目前docx模块在huggingface的python3.10报错。暂时不支持docx文件。
|
5 |
|
6 |
"""
|
7 |
+
##TODO: 完成txt文件的上传功能。
|
8 |
+
|
9 |
# -*- coding: utf-8 -*-
|
10 |
import numpy as np
|
11 |
import pandas as pd
|
|
|
74 |
df = pd.read_json(json_file)
|
75 |
file_content = df.to_string()
|
76 |
# print('file_content:', file_content)
|
77 |
+
|
78 |
+
elif '.txt' in file.name:
|
79 |
+
print('txt file detected')
|
80 |
+
|
81 |
+
### 本质上txt文件和json文件的处理方式是一样的。
|
82 |
+
txt_file = file.getvalue() ## 在streamlit中获得上传文件的json文件内容。这里不能用file.read().
|
83 |
+
# txt_file = txt_file.decode('utf-8') ### working.
|
84 |
+
txt_file = txt_file.encode('utf-8')
|
85 |
+
|
86 |
+
## 保障是utf-8的格式
|
87 |
+
# with open(file.name, 'r', encoding='utf-8') as f:
|
88 |
+
# txt_file = f.read()
|
89 |
+
|
90 |
+
file_content = txt_file
|
91 |
|
92 |
return file_content
|
93 |
|