Update backend.py
Browse files- backend.py +12 -2
backend.py
CHANGED
@@ -52,10 +52,20 @@ class InvoicePipeline:
|
|
52 |
def _get_raw_text_from_pdf(self, path:str) -> str:
|
53 |
text = ""
|
54 |
pdf_reader = PdfReader(path)
|
55 |
-
for page in pdf_reader:
|
56 |
text += page.extract_text()
|
57 |
return text
|
58 |
|
59 |
def _extract_data_from_llm(self, raw_data:str) -> str:
|
60 |
resp = self._llm(self._prompt_template.format(pages = raw_data))
|
61 |
-
return resp
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
def _get_raw_text_from_pdf(self, path:str) -> str:
|
53 |
text = ""
|
54 |
pdf_reader = PdfReader(path)
|
55 |
+
for page in pdf_reader.pages:
|
56 |
text += page.extract_text()
|
57 |
return text
|
58 |
|
59 |
def _extract_data_from_llm(self, raw_data:str) -> str:
|
60 |
resp = self._llm(self._prompt_template.format(pages = raw_data))
|
61 |
+
return resp
|
62 |
+
|
63 |
+
def _parse_response(self, response: str) -> Dict[str, str]:
|
64 |
+
pattern = r'{(.+)}'
|
65 |
+
re_match = re.search(pattern, response, re.DOTALL)
|
66 |
+
if re_match:
|
67 |
+
extracted_text = re_match.group(1)
|
68 |
+
data = eval('{' + extracted_text + '}')
|
69 |
+
return data
|
70 |
+
else:
|
71 |
+
raise Exception("No match found.")
|