Yiming Qian
commited on
Update README.md
Browse files
README.md
CHANGED
@@ -8,8 +8,9 @@ pipeline_tag: feature-extraction
|
|
8 |
It is a model based on quantized LLAMA 3 8B. The goal of this model is designed to parse PDF into markdown format documents. It provides an initial parsing service to the RAG system.
|
9 |
|
10 |
Please use the following code to parse PDF.
|
|
|
11 |
'''
|
12 |
-
import pymupdf
|
13 |
from bs4 import BeautifulSoup
|
14 |
import pickle
|
15 |
import torch
|
@@ -20,7 +21,7 @@ torch.random.manual_seed(0)
|
|
20 |
model_kwargs = dict(
|
21 |
use_cache=False,
|
22 |
trust_remote_code=True,
|
23 |
-
attn_implementation="flash_attention_2",
|
24 |
torch_dtype=torch.bfloat16,
|
25 |
device_map="cuda",
|
26 |
load_in_4bit=True
|
@@ -89,7 +90,7 @@ generation_args = {
|
|
89 |
"do_sample": False,
|
90 |
}
|
91 |
|
92 |
-
|
93 |
filename ='2023071000529.pdf'
|
94 |
elements=[]
|
95 |
with pymupdf.open(filename) as doc:
|
|
|
8 |
It is a model based on quantized LLAMA 3 8B. The goal of this model is designed to parse PDF into markdown format documents. It provides an initial parsing service to the RAG system.
|
9 |
|
10 |
Please use the following code to parse PDF.
|
11 |
+
|
12 |
'''
|
13 |
+
import pymupdf
|
14 |
from bs4 import BeautifulSoup
|
15 |
import pickle
|
16 |
import torch
|
|
|
21 |
model_kwargs = dict(
|
22 |
use_cache=False,
|
23 |
trust_remote_code=True,
|
24 |
+
attn_implementation="flash_attention_2",
|
25 |
torch_dtype=torch.bfloat16,
|
26 |
device_map="cuda",
|
27 |
load_in_4bit=True
|
|
|
90 |
"do_sample": False,
|
91 |
}
|
92 |
|
93 |
+
|
94 |
filename ='2023071000529.pdf'
|
95 |
elements=[]
|
96 |
with pymupdf.open(filename) as doc:
|