Commit
·
596a523
1
Parent(s):
5a088f1
added granularity
Browse files- app.py +15 -3
- src/RAG.py +22 -6
- src/pipelines.py +8 -2
app.py
CHANGED
@@ -83,8 +83,12 @@ def display_invoice(image_path):
|
|
83 |
output_pdf = "invoice_" + os.path.basename(image_path).split(".")[0] + ".pdf"
|
84 |
path_to_output_pdf = f"{output_folder}/{output_pdf}"
|
85 |
try:
|
|
|
86 |
result = pipeline.generate_invoice(
|
87 |
-
image_path=image_path,
|
|
|
|
|
|
|
88 |
)
|
89 |
if result is None:
|
90 |
st.write("Image is irrelevant, upload another one")
|
@@ -124,13 +128,21 @@ with col1:
|
|
124 |
|
125 |
with col2:
|
126 |
if st.session_state.get("image"):
|
127 |
-
if st.button("Generate invoice"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
with st.spinner("Generating..."):
|
129 |
st.session_state["invoice"] = display_invoice(st.session_state["image"])
|
130 |
if st.session_state["invoice"]:
|
131 |
st.session_state["status"] = "loaded"
|
132 |
else:
|
133 |
-
st.button("Generate invoice", disabled=True)
|
|
|
134 |
if st.session_state.get("generated_pdf"):
|
135 |
with open(st.session_state["generated_pdf"], "rb") as f:
|
136 |
file_data = f.read()
|
|
|
83 |
output_pdf = "invoice_" + os.path.basename(image_path).split(".")[0] + ".pdf"
|
84 |
path_to_output_pdf = f"{output_folder}/{output_pdf}"
|
85 |
try:
|
86 |
+
print(f"Generate granular invoice: {st.session_state['granular_invoice']}")
|
87 |
result = pipeline.generate_invoice(
|
88 |
+
image_path=image_path,
|
89 |
+
output_path=path_to_output_pdf,
|
90 |
+
car_parts=car_parts,
|
91 |
+
use_granular_invoice=st.session_state["granular_invoice"]
|
92 |
)
|
93 |
if result is None:
|
94 |
st.write("Image is irrelevant, upload another one")
|
|
|
128 |
|
129 |
with col2:
|
130 |
if st.session_state.get("image"):
|
131 |
+
if st.button("Generate regular invoice"):
|
132 |
+
st.session_state["granular_invoice"] = False
|
133 |
+
with st.spinner("Generating..."):
|
134 |
+
st.session_state["invoice"] = display_invoice(st.session_state["image"])
|
135 |
+
if st.session_state["invoice"]:
|
136 |
+
st.session_state["status"] = "loaded"
|
137 |
+
if st.button("Generate granular invoice"):
|
138 |
+
st.session_state["granular_invoice"] = True
|
139 |
with st.spinner("Generating..."):
|
140 |
st.session_state["invoice"] = display_invoice(st.session_state["image"])
|
141 |
if st.session_state["invoice"]:
|
142 |
st.session_state["status"] = "loaded"
|
143 |
else:
|
144 |
+
st.button("Generate regular invoice", disabled=True)
|
145 |
+
st.button("Generate granular invoice", disabled=True)
|
146 |
if st.session_state.get("generated_pdf"):
|
147 |
with open(st.session_state["generated_pdf"], "rb") as f:
|
148 |
file_data = f.read()
|
src/RAG.py
CHANGED
@@ -34,8 +34,17 @@ class RAG:
|
|
34 |
self.path_to_images = path_to_images
|
35 |
self.reranker = reranker
|
36 |
if path_to_invoice_json:
|
37 |
-
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
@staticmethod
|
41 |
def image_to_base64(image_path):
|
@@ -61,14 +70,18 @@ class RAG:
|
|
61 |
distances, indices = self.index.search(image_features, k)
|
62 |
return distances, indices
|
63 |
|
64 |
-
def return_invoice_table(self, path=None, invoice_is_table=True):
|
65 |
if path is None and not invoice_is_table:
|
66 |
raise ValueError("Path to invoice must be provided.")
|
67 |
if self.invoice_json is None and invoice_is_table:
|
68 |
raise ValueError("Path to invoice json must be provided.")
|
|
|
|
|
69 |
|
70 |
-
if invoice_is_table:
|
71 |
return self.invoice_json[path]
|
|
|
|
|
72 |
|
73 |
pdf_path = f"{self.path_to_invoices}/{path}"
|
74 |
reader = PdfReader(pdf_path)
|
@@ -98,7 +111,8 @@ class RAG:
|
|
98 |
return_only_path=True,
|
99 |
k=1,
|
100 |
damage_description=None,
|
101 |
-
invoice_is_table=True
|
|
|
102 |
):
|
103 |
if self.image_invoice_index is None:
|
104 |
raise ValueError("No index for invoices found.")
|
@@ -124,7 +138,9 @@ class RAG:
|
|
124 |
invoices_tables = []
|
125 |
|
126 |
for invoice in invoices:
|
127 |
-
reformatted_inv = self.return_invoice_table(
|
|
|
|
|
128 |
invoices_tables.append(reformatted_inv)
|
129 |
|
130 |
return invoices_tables, invoices
|
|
|
34 |
self.path_to_images = path_to_images
|
35 |
self.reranker = reranker
|
36 |
if path_to_invoice_json:
|
37 |
+
if type(path_to_invoice_json) == str:
|
38 |
+
with open(path_to_invoice_json, "r") as f:
|
39 |
+
self.invoice_json = json.load(f)
|
40 |
+
elif type(path_to_invoice_json) == dict and set(list(path_to_invoice_json.keys())) == {"invoices", "invoices_granular"}:
|
41 |
+
with open(path_to_invoice_json["invoices"], "r") as f:
|
42 |
+
self.invoice_json = json.load(f)
|
43 |
+
with open(path_to_invoice_json["invoices_granular"], "r") as f:
|
44 |
+
self.invoice_json_granular = json.load(f)
|
45 |
+
else:
|
46 |
+
raise ValueError("Invalid format for invoice json.")
|
47 |
+
|
48 |
|
49 |
@staticmethod
|
50 |
def image_to_base64(image_path):
|
|
|
70 |
distances, indices = self.index.search(image_features, k)
|
71 |
return distances, indices
|
72 |
|
73 |
+
def return_invoice_table(self, path=None, invoice_is_table=True, use_granular_invoice=False):
|
74 |
if path is None and not invoice_is_table:
|
75 |
raise ValueError("Path to invoice must be provided.")
|
76 |
if self.invoice_json is None and invoice_is_table:
|
77 |
raise ValueError("Path to invoice json must be provided.")
|
78 |
+
if self.invoice_json_granular is None and use_granular_invoice:
|
79 |
+
raise ValueError("Path to granular invoice json must be provided.")
|
80 |
|
81 |
+
if invoice_is_table and not use_granular_invoice:
|
82 |
return self.invoice_json[path]
|
83 |
+
elif invoice_is_table and use_granular_invoice:
|
84 |
+
return self.invoice_json_granular[path]
|
85 |
|
86 |
pdf_path = f"{self.path_to_invoices}/{path}"
|
87 |
reader = PdfReader(pdf_path)
|
|
|
111 |
return_only_path=True,
|
112 |
k=1,
|
113 |
damage_description=None,
|
114 |
+
invoice_is_table=True,
|
115 |
+
use_granular_invoice=False
|
116 |
):
|
117 |
if self.image_invoice_index is None:
|
118 |
raise ValueError("No index for invoices found.")
|
|
|
138 |
invoices_tables = []
|
139 |
|
140 |
for invoice in invoices:
|
141 |
+
reformatted_inv = self.return_invoice_table(
|
142 |
+
invoice, invoice_is_table, use_granular_invoice=use_granular_invoice
|
143 |
+
)
|
144 |
invoices_tables.append(reformatted_inv)
|
145 |
|
146 |
return invoices_tables, invoices
|
src/pipelines.py
CHANGED
@@ -106,7 +106,9 @@ class InvoiceGenerator:
|
|
106 |
all_lines[first_cost_line:last_cost_line] = list(map(lambda x: " | ".join(x), cost_lines))
|
107 |
return "\n".join(all_lines)
|
108 |
|
109 |
-
def generate_invoice(
|
|
|
|
|
110 |
|
111 |
result = {}
|
112 |
|
@@ -121,7 +123,11 @@ class InvoiceGenerator:
|
|
121 |
print("Damage Description:", damage_description)
|
122 |
|
123 |
invoice_info, invoice_path = self.rag.find_invoice(
|
124 |
-
image_path=image_path,
|
|
|
|
|
|
|
|
|
125 |
)
|
126 |
invoice_info = invoice_info[0]
|
127 |
invoice_path = invoice_path[0]
|
|
|
106 |
all_lines[first_cost_line:last_cost_line] = list(map(lambda x: " | ".join(x), cost_lines))
|
107 |
return "\n".join(all_lines)
|
108 |
|
109 |
+
def generate_invoice(
|
110 |
+
self, image_path, output_path=None, car_parts=None, use_granular_invoice=False
|
111 |
+
):
|
112 |
|
113 |
result = {}
|
114 |
|
|
|
123 |
print("Damage Description:", damage_description)
|
124 |
|
125 |
invoice_info, invoice_path = self.rag.find_invoice(
|
126 |
+
image_path=image_path,
|
127 |
+
return_only_path=False,
|
128 |
+
damage_description=damage_description,
|
129 |
+
k=5,
|
130 |
+
use_granular_invoice=use_granular_invoice
|
131 |
)
|
132 |
invoice_info = invoice_info[0]
|
133 |
invoice_path = invoice_path[0]
|