Arsenii11 commited on
Commit
5e981a8
·
1 Parent(s): 16bef89

Fix for no svm_model found

Browse files
Files changed (1) hide show
  1. mineru_single.py +2 -2
mineru_single.py CHANGED
@@ -88,7 +88,7 @@ class Processor:
88
  logger.info("doc_analyze complete for key='%s'. Started extracting images...", key)
89
  # Classify images and remove irrelevant ones
90
  # image_writer = ImageWriter(self.s3_writer)
91
- image_writer = ImageWriter(self.s3_writer, f"{self.prefix}{key}/") # Pass base path to ImageWriter
92
  pipe_result = inference.pipe_ocr_mode(image_writer, lang=self.language)
93
  logger.info("OCR pipeline completed for key='%s'.", key)
94
  md_content = pipe_result.get_markdown(f"{self.prefix}{key}/")
@@ -128,7 +128,7 @@ class ImageWriter(DataWriter):
128
  Receives each extracted image. Classifies it, uploads if relevant, or flags
129
  it for removal if irrelevant.
130
  """
131
- def __init__(self, s3_writer: s3Writer, base_path: str):
132
  self.s3_writer = s3_writer
133
  self.base_path = base_path
134
  self.svm_model = svm_model
 
88
  logger.info("doc_analyze complete for key='%s'. Started extracting images...", key)
89
  # Classify images and remove irrelevant ones
90
  # image_writer = ImageWriter(self.s3_writer)
91
+ image_writer = ImageWriter(self.s3_writer, f"{self.prefix}{key}/", self.svm_model) # Pass base path to ImageWriter
92
  pipe_result = inference.pipe_ocr_mode(image_writer, lang=self.language)
93
  logger.info("OCR pipeline completed for key='%s'.", key)
94
  md_content = pipe_result.get_markdown(f"{self.prefix}{key}/")
 
128
  Receives each extracted image. Classifies it, uploads if relevant, or flags
129
  it for removal if irrelevant.
130
  """
131
+ def __init__(self, s3_writer: s3Writer, base_path: str, svm_model: SVMModel):
132
  self.s3_writer = s3_writer
133
  self.base_path = base_path
134
  self.svm_model = svm_model