alexandraroze commited on
Commit
596a523
·
1 Parent(s): 5a088f1

added granularity

Browse files
Files changed (3) hide show
  1. app.py +15 -3
  2. src/RAG.py +22 -6
  3. src/pipelines.py +8 -2
app.py CHANGED
@@ -83,8 +83,12 @@ def display_invoice(image_path):
83
  output_pdf = "invoice_" + os.path.basename(image_path).split(".")[0] + ".pdf"
84
  path_to_output_pdf = f"{output_folder}/{output_pdf}"
85
  try:
 
86
  result = pipeline.generate_invoice(
87
- image_path=image_path, output_path=path_to_output_pdf, car_parts=car_parts
 
 
 
88
  )
89
  if result is None:
90
  st.write("Image is irrelevant, upload another one")
@@ -124,13 +128,21 @@ with col1:
124
 
125
  with col2:
126
  if st.session_state.get("image"):
127
- if st.button("Generate invoice"):
 
 
 
 
 
 
 
128
  with st.spinner("Generating..."):
129
  st.session_state["invoice"] = display_invoice(st.session_state["image"])
130
  if st.session_state["invoice"]:
131
  st.session_state["status"] = "loaded"
132
  else:
133
- st.button("Generate invoice", disabled=True)
 
134
  if st.session_state.get("generated_pdf"):
135
  with open(st.session_state["generated_pdf"], "rb") as f:
136
  file_data = f.read()
 
83
  output_pdf = "invoice_" + os.path.basename(image_path).split(".")[0] + ".pdf"
84
  path_to_output_pdf = f"{output_folder}/{output_pdf}"
85
  try:
86
+ print(f"Generate granular invoice: {st.session_state['granular_invoice']}")
87
  result = pipeline.generate_invoice(
88
+ image_path=image_path,
89
+ output_path=path_to_output_pdf,
90
+ car_parts=car_parts,
91
+ use_granular_invoice=st.session_state["granular_invoice"]
92
  )
93
  if result is None:
94
  st.write("Image is irrelevant, upload another one")
 
128
 
129
  with col2:
130
  if st.session_state.get("image"):
131
+ if st.button("Generate regular invoice"):
132
+ st.session_state["granular_invoice"] = False
133
+ with st.spinner("Generating..."):
134
+ st.session_state["invoice"] = display_invoice(st.session_state["image"])
135
+ if st.session_state["invoice"]:
136
+ st.session_state["status"] = "loaded"
137
+ if st.button("Generate granular invoice"):
138
+ st.session_state["granular_invoice"] = True
139
  with st.spinner("Generating..."):
140
  st.session_state["invoice"] = display_invoice(st.session_state["image"])
141
  if st.session_state["invoice"]:
142
  st.session_state["status"] = "loaded"
143
  else:
144
+ st.button("Generate regular invoice", disabled=True)
145
+ st.button("Generate granular invoice", disabled=True)
146
  if st.session_state.get("generated_pdf"):
147
  with open(st.session_state["generated_pdf"], "rb") as f:
148
  file_data = f.read()
src/RAG.py CHANGED
@@ -34,8 +34,17 @@ class RAG:
34
  self.path_to_images = path_to_images
35
  self.reranker = reranker
36
  if path_to_invoice_json:
37
- with open(path_to_invoice_json, "r") as f:
38
- self.invoice_json = json.load(f)
 
 
 
 
 
 
 
 
 
39
 
40
  @staticmethod
41
  def image_to_base64(image_path):
@@ -61,14 +70,18 @@ class RAG:
61
  distances, indices = self.index.search(image_features, k)
62
  return distances, indices
63
 
64
- def return_invoice_table(self, path=None, invoice_is_table=True):
65
  if path is None and not invoice_is_table:
66
  raise ValueError("Path to invoice must be provided.")
67
  if self.invoice_json is None and invoice_is_table:
68
  raise ValueError("Path to invoice json must be provided.")
 
 
69
 
70
- if invoice_is_table:
71
  return self.invoice_json[path]
 
 
72
 
73
  pdf_path = f"{self.path_to_invoices}/{path}"
74
  reader = PdfReader(pdf_path)
@@ -98,7 +111,8 @@ class RAG:
98
  return_only_path=True,
99
  k=1,
100
  damage_description=None,
101
- invoice_is_table=True
 
102
  ):
103
  if self.image_invoice_index is None:
104
  raise ValueError("No index for invoices found.")
@@ -124,7 +138,9 @@ class RAG:
124
  invoices_tables = []
125
 
126
  for invoice in invoices:
127
- reformatted_inv = self.return_invoice_table(invoice, invoice_is_table)
 
 
128
  invoices_tables.append(reformatted_inv)
129
 
130
  return invoices_tables, invoices
 
34
  self.path_to_images = path_to_images
35
  self.reranker = reranker
36
  if path_to_invoice_json:
37
+ if type(path_to_invoice_json) == str:
38
+ with open(path_to_invoice_json, "r") as f:
39
+ self.invoice_json = json.load(f)
40
+ elif type(path_to_invoice_json) == dict and set(list(path_to_invoice_json.keys())) == {"invoices", "invoices_granular"}:
41
+ with open(path_to_invoice_json["invoices"], "r") as f:
42
+ self.invoice_json = json.load(f)
43
+ with open(path_to_invoice_json["invoices_granular"], "r") as f:
44
+ self.invoice_json_granular = json.load(f)
45
+ else:
46
+ raise ValueError("Invalid format for invoice json.")
47
+
48
 
49
  @staticmethod
50
  def image_to_base64(image_path):
 
70
  distances, indices = self.index.search(image_features, k)
71
  return distances, indices
72
 
73
+ def return_invoice_table(self, path=None, invoice_is_table=True, use_granular_invoice=False):
74
  if path is None and not invoice_is_table:
75
  raise ValueError("Path to invoice must be provided.")
76
  if self.invoice_json is None and invoice_is_table:
77
  raise ValueError("Path to invoice json must be provided.")
78
+ if self.invoice_json_granular is None and use_granular_invoice:
79
+ raise ValueError("Path to granular invoice json must be provided.")
80
 
81
+ if invoice_is_table and not use_granular_invoice:
82
  return self.invoice_json[path]
83
+ elif invoice_is_table and use_granular_invoice:
84
+ return self.invoice_json_granular[path]
85
 
86
  pdf_path = f"{self.path_to_invoices}/{path}"
87
  reader = PdfReader(pdf_path)
 
111
  return_only_path=True,
112
  k=1,
113
  damage_description=None,
114
+ invoice_is_table=True,
115
+ use_granular_invoice=False
116
  ):
117
  if self.image_invoice_index is None:
118
  raise ValueError("No index for invoices found.")
 
138
  invoices_tables = []
139
 
140
  for invoice in invoices:
141
+ reformatted_inv = self.return_invoice_table(
142
+ invoice, invoice_is_table, use_granular_invoice=use_granular_invoice
143
+ )
144
  invoices_tables.append(reformatted_inv)
145
 
146
  return invoices_tables, invoices
src/pipelines.py CHANGED
@@ -106,7 +106,9 @@ class InvoiceGenerator:
106
  all_lines[first_cost_line:last_cost_line] = list(map(lambda x: " | ".join(x), cost_lines))
107
  return "\n".join(all_lines)
108
 
109
- def generate_invoice(self, image_path, output_path=None, car_parts=None):
 
 
110
 
111
  result = {}
112
 
@@ -121,7 +123,11 @@ class InvoiceGenerator:
121
  print("Damage Description:", damage_description)
122
 
123
  invoice_info, invoice_path = self.rag.find_invoice(
124
- image_path=image_path, return_only_path=False, damage_description=damage_description, k=5
 
 
 
 
125
  )
126
  invoice_info = invoice_info[0]
127
  invoice_path = invoice_path[0]
 
106
  all_lines[first_cost_line:last_cost_line] = list(map(lambda x: " | ".join(x), cost_lines))
107
  return "\n".join(all_lines)
108
 
109
+ def generate_invoice(
110
+ self, image_path, output_path=None, car_parts=None, use_granular_invoice=False
111
+ ):
112
 
113
  result = {}
114
 
 
123
  print("Damage Description:", damage_description)
124
 
125
  invoice_info, invoice_path = self.rag.find_invoice(
126
+ image_path=image_path,
127
+ return_only_path=False,
128
+ damage_description=damage_description,
129
+ k=5,
130
+ use_granular_invoice=use_granular_invoice
131
  )
132
  invoice_info = invoice_info[0]
133
  invoice_path = invoice_path[0]