princhman commited on
Commit
b544f5d
·
1 Parent(s): 1066474

update the queue

Browse files
__pycache__/inference_svm_model.cpython-310.pyc DELETED
Binary file (960 Bytes)
 
__pycache__/mineru_single.cpython-310.pyc DELETED
Binary file (4.24 kB)
 
__pycache__/worker.cpython-310.pyc DELETED
Binary file (4.85 kB)
 
app.py CHANGED
@@ -42,7 +42,7 @@ async def process_pdf(
42
 
43
  channel.basic_publish(
44
  exchange="",
45
- routing_key="ml_server",
46
  body=json.dumps(input_json),
47
  properties=pika.BasicProperties(
48
  headers={"process": "topic_extraction"}
 
42
 
43
  channel.basic_publish(
44
  exchange="",
45
+ routing_key="gpu_server",
46
  body=json.dumps(input_json),
47
  properties=pika.BasicProperties(
48
  headers={"process": "topic_extraction"}
download_models_hf.py CHANGED
@@ -1,67 +1,66 @@
1
- import json
2
- import os
3
-
4
- import requests
5
- from huggingface_hub import snapshot_download
6
-
7
-
8
- def download_json(url):
9
- # 下载JSON文件
10
- response = requests.get(url)
11
- response.raise_for_status() # 检查请求是否成功
12
- return response.json()
13
-
14
-
15
- def download_and_modify_json(url, local_filename, modifications):
16
- if os.path.exists(local_filename):
17
- data = json.load(open(local_filename))
18
- config_version = data.get('config_version', '0.0.0')
19
- if config_version < '1.1.1':
20
- data = download_json(url)
21
- else:
22
- data = download_json(url)
23
-
24
- # 修改内容
25
- for key, value in modifications.items():
26
- data[key] = value
27
-
28
- # 保存修改后的内容
29
- with open(local_filename, 'w', encoding='utf-8') as f:
30
- json.dump(data, f, ensure_ascii=False, indent=4)
31
-
32
-
33
- if __name__ == '__main__':
34
-
35
- mineru_patterns = [
36
- "models/Layout/LayoutLMv3/*",
37
- "models/Layout/YOLO/*",
38
- "models/MFD/YOLO/*",
39
- "models/MFR/unimernet_small_2501/*",
40
- "models/TabRec/TableMaster/*",
41
- "models/TabRec/StructEqTable/*",
42
- ]
43
- model_dir = snapshot_download('opendatalab/PDF-Extract-Kit-1.0', allow_patterns=mineru_patterns)
44
-
45
- layoutreader_pattern = [
46
- "*.json",
47
- "*.safetensors",
48
- ]
49
- layoutreader_model_dir = snapshot_download('hantian/layoutreader', allow_patterns=layoutreader_pattern)
50
-
51
- model_dir = model_dir + '/models'
52
- print(f'model_dir is: {model_dir}')
53
- print(f'layoutreader_model_dir is: {layoutreader_model_dir}')
54
-
55
- json_url = 'https://github.com/opendatalab/MinerU/raw/master/magic-pdf.template.json'
56
- config_file_name = 'magic-pdf.json'
57
- home_dir = os.path.expanduser('~')
58
- config_file = os.path.join(home_dir, config_file_name)
59
-
60
- json_mods = {
61
- 'models-dir': model_dir,
62
- 'layoutreader-model-dir': layoutreader_model_dir,
63
- 'device': 'cuda'
64
- }
65
-
66
- download_and_modify_json(json_url, config_file, json_mods)
67
- print(f'The configuration file has been configured successfully, the path is: {config_file}')
 
1
+ import json
2
+ import os
3
+
4
+ import requests
5
+ from huggingface_hub import snapshot_download
6
+
7
+
8
+ def download_json(url):
9
+ # 下载JSON文件
10
+ response = requests.get(url)
11
+ response.raise_for_status() # 检查请求是否成功
12
+ return response.json()
13
+
14
+
15
+ def download_and_modify_json(url, local_filename, modifications):
16
+ if os.path.exists(local_filename):
17
+ data = json.load(open(local_filename))
18
+ config_version = data.get('config_version', '0.0.0')
19
+ if config_version < '1.1.1':
20
+ data = download_json(url)
21
+ else:
22
+ data = download_json(url)
23
+
24
+ # 修改内容
25
+ for key, value in modifications.items():
26
+ data[key] = value
27
+
28
+ # 保存修改后的内容
29
+ with open(local_filename, 'w', encoding='utf-8') as f:
30
+ json.dump(data, f, ensure_ascii=False, indent=4)
31
+
32
+
33
+ if __name__ == '__main__':
34
+
35
+ mineru_patterns = [
36
+ "models/Layout/LayoutLMv3/*",
37
+ "models/Layout/YOLO/*",
38
+ "models/MFD/YOLO/*",
39
+ "models/MFR/unimernet_small_2501/*",
40
+ "models/TabRec/TableMaster/*",
41
+ "models/TabRec/StructEqTable/*",
42
+ ]
43
+ model_dir = snapshot_download('opendatalab/PDF-Extract-Kit-1.0', allow_patterns=mineru_patterns)
44
+
45
+ layoutreader_pattern = [
46
+ "*.json",
47
+ "*.safetensors",
48
+ ]
49
+ layoutreader_model_dir = snapshot_download('hantian/layoutreader', allow_patterns=layoutreader_pattern)
50
+
51
+ model_dir = model_dir + '/models'
52
+ print(f'model_dir is: {model_dir}')
53
+ print(f'layoutreader_model_dir is: {layoutreader_model_dir}')
54
+
55
+ json_url = 'https://github.com/opendatalab/MinerU/raw/master/magic-pdf.template.json'
56
+ config_file_name = 'magic-pdf.json'
57
+ home_dir = os.path.expanduser('~')
58
+ config_file = os.path.join(home_dir, config_file_name)
59
+
60
+ json_mods = {
61
+ 'models-dir': model_dir,
62
+ 'layoutreader-model-dir': layoutreader_model_dir,
63
+ }
64
+
65
+ download_and_modify_json(json_url, config_file, json_mods)
66
+ print(f'The configuration file has been configured successfully, the path is: {config_file}')