parasmech commited on
Commit
3fe28db
·
verified ·
1 Parent(s): 408dfd4

Update backend.py

Browse files
Files changed (1) hide show
  1. backend.py +12 -2
backend.py CHANGED
@@ -52,10 +52,20 @@ class InvoicePipeline:
52
  def _get_raw_text_from_pdf(self, path:str) -> str:
53
  text = ""
54
  pdf_reader = PdfReader(path)
55
- for page in pdf_reader:
56
  text += page.extract_text()
57
  return text
58
 
59
  def _extract_data_from_llm(self, raw_data:str) -> str:
60
  resp = self._llm(self._prompt_template.format(pages = raw_data))
61
- return resp
 
 
 
 
 
 
 
 
 
 
 
52
  def _get_raw_text_from_pdf(self, path:str) -> str:
53
  text = ""
54
  pdf_reader = PdfReader(path)
55
+ for page in pdf_reader.pages:
56
  text += page.extract_text()
57
  return text
58
 
59
  def _extract_data_from_llm(self, raw_data:str) -> str:
60
  resp = self._llm(self._prompt_template.format(pages = raw_data))
61
+ return resp
62
+
63
+ def _parse_response(self, response: str) -> Dict[str, str]:
64
+ pattern = r'{(.+)}'
65
+ re_match = re.search(pattern, response, re.DOTALL)
66
+ if re_match:
67
+ extracted_text = re_match.group(1)
68
+ data = eval('{' + extracted_text + '}')
69
+ return data
70
+ else:
71
+ raise Exception("No match found.")