Commit
·
3d02141
1
Parent(s):
db902e6
updated app
Browse files- app.py +29 -9
- src/pipelines.py +7 -7
app.py
CHANGED
@@ -1,4 +1,24 @@
|
|
|
|
|
|
1 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
from PIL import Image
|
3 |
import os
|
4 |
import torch
|
@@ -11,7 +31,7 @@ from src.pipelines import InvoiceGenerator
|
|
11 |
st.set_page_config(page_title="Invoice generator", layout="wide")
|
12 |
output_folder = "output"
|
13 |
data_folder = "data"
|
14 |
-
template = "template.
|
15 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
16 |
with open(f"{data_folder}/car_parts.json", "r") as f:
|
17 |
car_parts = json.load(f)
|
@@ -23,6 +43,7 @@ if "pipeline" not in st.session_state:
|
|
23 |
image_invoice_index_path=f"{data_folder}/image_invoice.csv",
|
24 |
path_to_invoices=f"{data_folder}/invoices",
|
25 |
path_to_images=f"{data_folder}/images",
|
|
|
26 |
reranker_model="monovlm",
|
27 |
device=device,
|
28 |
gpu_memory_utilization=0.65
|
@@ -44,24 +65,24 @@ def read_markdown_file(file_path):
|
|
44 |
|
45 |
|
46 |
def display_invoice(image_path):
|
47 |
-
|
48 |
-
|
49 |
try:
|
50 |
result = pipeline.generate_invoice(
|
51 |
-
image_path=image_path, output_path=
|
52 |
)
|
53 |
if result is None:
|
54 |
st.write("Image is irrelevant, upload another one")
|
55 |
st.session_state["status"] = "irrelevant"
|
56 |
return
|
57 |
-
print(f"Generated invoice: {
|
58 |
-
st.session_state["
|
59 |
st.session_state["invoice_info"] = result["invoice_info"]
|
60 |
st.session_state["invoice_path"] = f"{data_folder}/invoices/{result['invoice_path']}"
|
61 |
st.session_state["similar_image"] = f"{data_folder}/images/{result['similar_image']}"
|
62 |
st.session_state["damage_description"] = result["damage_description"]
|
63 |
st.session_state["detailed_damage_description"] = result["detailed_damage_description"]
|
64 |
-
return
|
65 |
except Exception as e:
|
66 |
st.write("Could not generate invoice, please try again")
|
67 |
print(e)
|
@@ -106,8 +127,7 @@ with col2:
|
|
106 |
|
107 |
with col3:
|
108 |
if st.session_state.get("status") == "loaded":
|
109 |
-
|
110 |
-
st.markdown(st.session_state["invoice"])
|
111 |
st.image(st.session_state["similar_image"], caption="Similar accident", width=300)
|
112 |
st.write(f"Detailed damage description: {st.session_state['detailed_damage_description']}")
|
113 |
st.write(f"Damage description: {st.session_state['damage_description']}")
|
|
|
1 |
+
import subprocess
|
2 |
+
import sys
|
3 |
import streamlit as st
|
4 |
+
|
5 |
+
def install_package(package, flags=False):
|
6 |
+
try:
|
7 |
+
__import__(package)
|
8 |
+
print(f"'{package}' is already installed.")
|
9 |
+
except ImportError:
|
10 |
+
print(f"Installing '{package}'...")
|
11 |
+
if flags:
|
12 |
+
subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-build-isolation", package])
|
13 |
+
else:
|
14 |
+
subprocess.check_call([sys.executable, "-m", "pip", "install", package])
|
15 |
+
|
16 |
+
if "installed_libraries" not in st.session_state or not st.session_state.installed_libraries:
|
17 |
+
install_package("flash-attn", flags=True)
|
18 |
+
install_package("rerankers[all]")
|
19 |
+
st.session_state.installed_libraries = True
|
20 |
+
|
21 |
+
|
22 |
from PIL import Image
|
23 |
import os
|
24 |
import torch
|
|
|
31 |
st.set_page_config(page_title="Invoice generator", layout="wide")
|
32 |
output_folder = "output"
|
33 |
data_folder = "data"
|
34 |
+
template = "template.md"
|
35 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
36 |
with open(f"{data_folder}/car_parts.json", "r") as f:
|
37 |
car_parts = json.load(f)
|
|
|
43 |
image_invoice_index_path=f"{data_folder}/image_invoice.csv",
|
44 |
path_to_invoices=f"{data_folder}/invoices",
|
45 |
path_to_images=f"{data_folder}/images",
|
46 |
+
path_to_template=f"{data_folder}/{template}",
|
47 |
reranker_model="monovlm",
|
48 |
device=device,
|
49 |
gpu_memory_utilization=0.65
|
|
|
65 |
|
66 |
|
67 |
def display_invoice(image_path):
|
68 |
+
output_pdf = "invoice_" + os.path.basename(image_path).split(".")[0] + ".pdf"
|
69 |
+
path_to_output_pdf = f"{output_folder}/{output_pdf}"
|
70 |
try:
|
71 |
result = pipeline.generate_invoice(
|
72 |
+
image_path=image_path, output_path=path_to_output_pdf, car_parts=car_parts
|
73 |
)
|
74 |
if result is None:
|
75 |
st.write("Image is irrelevant, upload another one")
|
76 |
st.session_state["status"] = "irrelevant"
|
77 |
return
|
78 |
+
print(f"Generated invoice: {path_to_output_pdf}")
|
79 |
+
st.session_state["generated_pdf"] = path_to_output_pdf
|
80 |
st.session_state["invoice_info"] = result["invoice_info"]
|
81 |
st.session_state["invoice_path"] = f"{data_folder}/invoices/{result['invoice_path']}"
|
82 |
st.session_state["similar_image"] = f"{data_folder}/images/{result['similar_image']}"
|
83 |
st.session_state["damage_description"] = result["damage_description"]
|
84 |
st.session_state["detailed_damage_description"] = result["detailed_damage_description"]
|
85 |
+
return get_image_from_pdf(path_to_output_pdf)
|
86 |
except Exception as e:
|
87 |
st.write("Could not generate invoice, please try again")
|
88 |
print(e)
|
|
|
127 |
|
128 |
with col3:
|
129 |
if st.session_state.get("status") == "loaded":
|
130 |
+
st.image(st.session_state["invoice"], caption="Generated invoice", use_container_width=True)
|
|
|
131 |
st.image(st.session_state["similar_image"], caption="Similar accident", width=300)
|
132 |
st.write(f"Detailed damage description: {st.session_state['detailed_damage_description']}")
|
133 |
st.write(f"Damage description: {st.session_state['damage_description']}")
|
src/pipelines.py
CHANGED
@@ -4,6 +4,7 @@ from src.prompts import GENERATE_INVOICE_PROMPT, GENERATE_BRIEF_DAMAGE_DESCRIPTI
|
|
4 |
GENERATE_DETAILED_DAMAGE_DESCRIPTION_PROMPT
|
5 |
from rerankers import Reranker
|
6 |
import re
|
|
|
7 |
from fuzzywuzzy import fuzz
|
8 |
|
9 |
|
@@ -14,6 +15,7 @@ class InvoiceGenerator:
|
|
14 |
image_invoice_index_path,
|
15 |
path_to_invoices,
|
16 |
path_to_images,
|
|
|
17 |
reranker_model=None,
|
18 |
device="cuda",
|
19 |
max_model_len=4096, max_tokens=2048, gpu_memory_utilization=0.95
|
@@ -33,15 +35,13 @@ class InvoiceGenerator:
|
|
33 |
)
|
34 |
self.path_to_invoices = path_to_invoices
|
35 |
self.path_to_images = path_to_images
|
|
|
36 |
|
37 |
-
def format_invoice(self, generated_invoice, output_path
|
38 |
-
with open(
|
39 |
md_text = f.read()
|
40 |
md_text = md_text.replace(r"<<table>>", generated_invoice)
|
41 |
-
|
42 |
-
f.write(md_text)
|
43 |
-
# md2pdf(output_path, md_content=md_text)
|
44 |
-
return md_text
|
45 |
|
46 |
@staticmethod
|
47 |
def check_within_range(generated_invoice, car_parts):
|
@@ -145,6 +145,6 @@ class InvoiceGenerator:
|
|
145 |
print(comparing_results)
|
146 |
|
147 |
if output_path:
|
148 |
-
self.format_invoice(generated_invoice=generated_invoice, output_path=output_path
|
149 |
|
150 |
return result
|
|
|
4 |
GENERATE_DETAILED_DAMAGE_DESCRIPTION_PROMPT
|
5 |
from rerankers import Reranker
|
6 |
import re
|
7 |
+
from md2pdf.core import md2pdf
|
8 |
from fuzzywuzzy import fuzz
|
9 |
|
10 |
|
|
|
15 |
image_invoice_index_path,
|
16 |
path_to_invoices,
|
17 |
path_to_images,
|
18 |
+
path_to_template,
|
19 |
reranker_model=None,
|
20 |
device="cuda",
|
21 |
max_model_len=4096, max_tokens=2048, gpu_memory_utilization=0.95
|
|
|
35 |
)
|
36 |
self.path_to_invoices = path_to_invoices
|
37 |
self.path_to_images = path_to_images
|
38 |
+
self.path_to_template = path_to_template
|
39 |
|
40 |
+
def format_invoice(self, generated_invoice, output_path):
|
41 |
+
with open(self.path_to_template, "r") as f:
|
42 |
md_text = f.read()
|
43 |
md_text = md_text.replace(r"<<table>>", generated_invoice)
|
44 |
+
md2pdf(output_path, md_content=md_text)
|
|
|
|
|
|
|
45 |
|
46 |
@staticmethod
|
47 |
def check_within_range(generated_invoice, car_parts):
|
|
|
145 |
print(comparing_results)
|
146 |
|
147 |
if output_path:
|
148 |
+
self.format_invoice(generated_invoice=generated_invoice, output_path=output_path)
|
149 |
|
150 |
return result
|