Spaces:
Build error
Build error
Update main.py
Browse files
main.py
CHANGED
@@ -30,14 +30,20 @@ class PDFProcessor:
|
|
30 |
self.mineru = Mineru()
|
31 |
self.emb_model = SentenceTransformer('all-MiniLM-L6-v2')
|
32 |
|
33 |
-
# Initialize
|
34 |
-
self.llm =
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
36 |
n_ctx=2048,
|
37 |
n_threads=os.cpu_count() - 1,
|
38 |
-
n_gpu_layers=35 if os.getenv('USE_GPU') else 0
|
|
|
39 |
)
|
40 |
-
|
41 |
def extract_layout(self, pdf_path: str) -> List[Layout]:
|
42 |
"""Extract structured layout using MinerU"""
|
43 |
return self.mineru.process_pdf(pdf_path)
|
|
|
30 |
self.mineru = Mineru()
|
31 |
self.emb_model = SentenceTransformer('all-MiniLM-L6-v2')
|
32 |
|
33 |
+
# Initialize LLM with automatic download
|
34 |
+
self.llm = self._initialize_llm()
|
35 |
+
|
36 |
+
def _initialize_llm(self):
|
37 |
+
"""Initialize LLM with automatic download if needed"""
|
38 |
+
return Llama.from_pretrained(
|
39 |
+
repo_id="TheBloke/deepseek-1.3B-GGUF",
|
40 |
+
filename="deepseek-1.3b.Q5_K_M.gguf",
|
41 |
n_ctx=2048,
|
42 |
n_threads=os.cpu_count() - 1,
|
43 |
+
n_gpu_layers=35 if os.getenv('USE_GPU') else 0,
|
44 |
+
verbose=False
|
45 |
)
|
46 |
+
|
47 |
def extract_layout(self, pdf_path: str) -> List[Layout]:
|
48 |
"""Extract structured layout using MinerU"""
|
49 |
return self.mineru.process_pdf(pdf_path)
|