minar09 commited on
Commit
5307b4f
·
verified ·
1 Parent(s): bb91dd1

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +11 -5
main.py CHANGED
@@ -30,14 +30,20 @@ class PDFProcessor:
30
  self.mineru = Mineru()
31
  self.emb_model = SentenceTransformer('all-MiniLM-L6-v2')
32
 
33
- # Initialize quantized LLM (using deepseek-1.3b)
34
- self.llm = Llama(
35
- model_path="models/deepseek-1.3b-q5_k_m.gguf",
 
 
 
 
 
36
  n_ctx=2048,
37
  n_threads=os.cpu_count() - 1,
38
- n_gpu_layers=35 if os.getenv('USE_GPU') else 0
 
39
  )
40
-
41
  def extract_layout(self, pdf_path: str) -> List[Layout]:
42
  """Extract structured layout using MinerU"""
43
  return self.mineru.process_pdf(pdf_path)
 
30
  self.mineru = Mineru()
31
  self.emb_model = SentenceTransformer('all-MiniLM-L6-v2')
32
 
33
+ # Initialize LLM with automatic download
34
+ self.llm = self._initialize_llm()
35
+
36
+ def _initialize_llm(self):
37
+ """Initialize LLM with automatic download if needed"""
38
+ return Llama.from_pretrained(
39
+ repo_id="TheBloke/deepseek-1.3B-GGUF",
40
+ filename="deepseek-1.3b.Q5_K_M.gguf",
41
  n_ctx=2048,
42
  n_threads=os.cpu_count() - 1,
43
+ n_gpu_layers=35 if os.getenv('USE_GPU') else 0,
44
+ verbose=False
45
  )
46
+
47
  def extract_layout(self, pdf_path: str) -> List[Layout]:
48
  """Extract structured layout using MinerU"""
49
  return self.mineru.process_pdf(pdf_path)