Spaces:

Innosphere-AI-organization
/

pixtral-demo

Paused

App Files Files Community

alexandraroze commited on Nov 27, 2024

Commit

f11a85d

1 Parent(s): 92a2adc

dockerfile

Browse files

Files changed (8) hide show

Dockerfile +25 -0
app.py +5 -0
poetry.lock +0 -0
pyproject.toml +29 -0
src/RAG.py +173 -0
src/model.py +55 -0
src/pipelines.py +149 -0
src/prompts.py +72 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,25 @@

+FROM python:3.10
+# Create a non-root user
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+# Set the working directory
+WORKDIR /app
+# Copy Poetry config and install Poetry
+COPY --chown=user ./pyproject.toml ./poetry.lock* ./
+RUN pip install --no-cache-dir --upgrade pip \
+    && pip install --no-cache-dir poetry \
+    && poetry config virtualenvs.create false \
+    && poetry install --no-dev --no-interaction --no-ansi
+# Copy the rest of the app
+COPY --chown=user . /app
+# Expose the Streamlit default port
+EXPOSE 8501
+# Run the Streamlit app
+CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]

app.py ADDED Viewed

	@@ -0,0 +1,5 @@

+# streamlit example
+import streamlit as st
+st.title('Hello World!')

poetry.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml ADDED Viewed

	@@ -0,0 +1,29 @@

+[tool.poetry]
+name = "demo-pixtral-qwen"
+version = "0.1.0"
+description = ""
+authors = ["Your Name <[email protected]>"]
+[tool.poetry.dependencies]
+python = "^3.10"
+clip = {git = "https://github.com/openai/CLIP.git", rev = "main"}
+torchvision = "0.19"
+vllm = "^0.6.3"
+pillow = "10.3.0"
+PyMuPDF = "^1.24.13"
+pandas = "^2.2.3"
+faiss-gpu = "^1.7.2"
+huggingface = "^0.0.1"
+huggingface-hub = "^0.26.2"
+md2pdf = "^1.0.1"
+pypdf = "^5.1.0"
+wheel = "^0.45.1"
+rerankers = {extras = ["all"], version = "^0.6.0"}
+streamlit = "^1.40.1"
+fuzzywuzzy = "^0.18.0"
+[tool.poetry.dev-dependencies]
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"

src/RAG.py ADDED Viewed

	@@ -0,0 +1,173 @@

+import clip
+import faiss
+from PIL import Image
+from pypdf import PdfReader
+import pandas as pd
+import re
+import os
+import fitz
+import torch
+import numpy as np
+from tqdm import tqdm
+import base64
+class RAG:
+    def __init__(
+            self,
+            fais_index_path,
+            clip_model="ViT-B/32",
+            reranker=None,
+            device="cpu",
+            image_invoice_index_path=None,
+            path_to_invoices=None,
+            path_to_images=None
+    ):
+        self.index = faiss.read_index(fais_index_path)
+        self.model, self.preprocess = clip.load(clip_model, device=device)
+        self.device = device
+        if image_invoice_index_path:
+            self.image_invoice_index = pd.read_csv(image_invoice_index_path)
+        self.path_to_invoices = path_to_invoices
+        self.path_to_images = path_to_images
+        self.reranker = reranker
+    @staticmethod
+    def image_to_base64(image_path):
+        with open(image_path, "rb") as image_file:
+            return base64.b64encode(image_file.read())
+    def search_text(self, text, k=1):
+        text_features = self.model.encode_text(clip.tokenize([text]).to(self.device))
+        text_features /= text_features.norm(dim=-1, keepdim=True)
+        text_features = text_features.detach().numpy()
+        distances, indices = self.index.search(text_features, k)
+        return distances, indices
+    def search_image(self, image=None, image_path=None, k=1):
+        if image is None and image_path is None:
+            raise ValueError("Either image or image_path must be provided.")
+        if image is None:
+            image = Image.open(image_path)
+        image_input = self.preprocess(image).unsqueeze(0).to(self.device)
+        image_features = self.model.encode_image(image_input)
+        image_features /= image_features.norm(dim=-1, keepdim=True)
+        image_features = image_features.detach().numpy()
+        distances, indices = self.index.search(image_features, k)
+        return distances, indices
+    def find_invoice(self, image=None, image_path=None, return_only_path=True, k=1, damage_description=None):
+        if self.image_invoice_index is None:
+            raise ValueError("No index for invoices found.")
+        _, indices = self.search_image(image=image, image_path=image_path, k=k)
+        img_ids = self.image_invoice_index.iloc[indices[0]]['img_id'].values
+        invoices = self.image_invoice_index[self.image_invoice_index['img_id'].isin(img_ids)]['invoice'].values.tolist()
+        images_paths = self.image_invoice_index[self.image_invoice_index['img_id'].isin(img_ids)]['image'].values.tolist()
+        if self.reranker:
+            if damage_description is None:
+                raise ValueError("Damage description must be provided.")
+            # images = [self.image_to_base64(f"{self.path_to_images}/{img_path}") for img_path in images_paths]
+            images = [f"{self.path_to_images}/{img_path}" for img_path in images_paths]
+            results = self.reranker.rank(damage_description, images, doc_ids=invoices)
+            invoices = [doc.doc_id for doc in results]
+            print(invoices)
+        if return_only_path:
+            return invoices, images_paths
+        if not self.path_to_invoices:
+            raise ValueError("Path to data must be provided.")
+        invoices_tables = []
+        for invoice in invoices:
+            pdf_path = f"{self.path_to_invoices}/{invoice}"
+            reader = PdfReader(pdf_path)
+            page = reader.pages[0]
+            text = page.extract_text()
+            table_text = re.search(r"Beschädigtes Teil.*?Gesamtsumme:.*?EUR", text, re.DOTALL).group()
+            lines = table_text.splitlines()
+            header = lines[0]
+            other_text = "\n".join(lines[1:])
+            cleaned_text = re.sub(r"(?<!\d)\n", " ", other_text)
+            table = header + "\n" + cleaned_text
+            inv = table.split("\n")
+            reformatted_inv = "Beschädigtes Teil | Teilkosten (EUR) | Arbeitsstunden | Arbeitskosten (EUR/Stunde) | Gesamtkosten (EUR)\n" + "\n".join(
+                " ".join(inv[i].split(" ")[:-4]) + " | " + ' | '.join(inv[i].split(" ")[-4:]) for i in
+                range(1, len(inv) - 1)) + "\n" + inv[-1]
+            invoices_tables.append(reformatted_inv)
+        return invoices_tables, invoices
+def build_rag(directory):
+    invoices = os.listdir(f"{directory}/invoices_validated")
+    invoices = [i for i in invoices if i.endswith(".pdf")]
+    image_invoice = []
+    os.makedirs(f"{directory}/images", exist_ok=True)
+    os.makedirs(f"{directory}/invoices", exist_ok=True)
+    for invoice in invoices:
+        doc = fitz.open(f"{directory}/invoices_validated/{invoice}")
+        page = doc[1]
+        image_list = page.get_images(full=True)
+        text = page.get_text()
+        xref = image_list[0][0]
+        base_image = doc.extract_image(xref)
+        image_bytes = base_image["image"]
+        image_name = invoice.replace(".pdf", ".png")
+        with open(f"{directory}/images/{image_name}", "wb") as img_file:
+            img_file.write(image_bytes)
+        doc.delete_pages(range(1, doc.page_count))
+        doc.save(f"{directory}/invoices/{invoice}")
+        doc.close()
+        image_invoice.append({
+            "invoice": invoice,
+            "image": image_name,
+            "description": text
+        })
+    image_invoice = pd.DataFrame(image_invoice)
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    model, preprocess = clip.load("ViT-B/32", device=device)
+    images = image_invoice["image"].tolist()
+    embeddings = []
+    image_indices = []
+    img_ids = []
+    for idx, img_path in enumerate(tqdm(images)):
+        image = Image.open(f"{directory}/images/{img_path}")
+        img_ids.append(idx)
+        inputs = preprocess(image).unsqueeze(0).to(device)
+        with torch.no_grad():
+            image_embedding = model.encode_image(inputs)
+        image_embedding = image_embedding / image_embedding.norm(dim=-1, keepdim=True)
+        embeddings.append(image_embedding.cpu().numpy().astype("float32"))
+        image_indices.append(img_path)
+    image_invoice["img_id"] = img_ids
+    image_invoice.to_csv(f"{directory}/image_invoice.csv", index=False)
+    embeddings_np = np.vstack(embeddings)
+    dimension = embeddings_np.shape[1]
+    index = faiss.IndexFlatIP(dimension)
+    index.add(embeddings_np)
+    faiss.write_index(index, f"{directory}/invoice_index.faiss")

src/model.py ADDED Viewed

	@@ -0,0 +1,55 @@

+from vllm import LLM
+from vllm.sampling_params import SamplingParams
+import base64
+def encode_image(image_path: str):
+    with open(image_path, "rb") as image_file:
+        return base64.b64encode(image_file.read()).decode("utf-8")
+class Pixtral:
+    def __init__(self, max_model_len=4096, max_tokens=4096, gpu_memory_utilization=0.95, temperature=0.35):
+        self.model_name = "mistralai/Pixtral-12B-2409"
+        self.sampling_params = SamplingParams(max_tokens=max_tokens, temperature=temperature)
+        self.llm = LLM(
+            model=self.model_name,
+            tokenizer_mode="mistral",
+            gpu_memory_utilization=gpu_memory_utilization,
+            load_format="mistral",
+            config_format="mistral",
+            max_model_len=max_model_len
+        )
+    def generate_message_from_image(self, prompt, image_path):
+        base64_image = encode_image(image_path)
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": prompt},
+                    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
+                ]
+            },
+        ]
+        outputs = self.llm.chat(messages, sampling_params=self.sampling_params)
+        return outputs[0].outputs[0].text
+    def generate_message(self, prompt):
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": prompt},
+                ]
+            },
+        ]
+        outputs = self.llm.chat(messages, sampling_params=self.sampling_params)
+        return outputs[0].outputs[0].text

src/pipelines.py ADDED Viewed

	@@ -0,0 +1,149 @@

+from src.RAG import RAG
+from src.model import Pixtral
+from src.prompts import GENERATE_INVOICE_PROMPT, GENERATE_BRIEF_DAMAGE_DESCRIPTION_PROMPT, \
+    GENERATE_DETAILED_DAMAGE_DESCRIPTION_PROMPT
+from md2pdf.core import md2pdf
+from rerankers import Reranker
+import re
+from fuzzywuzzy import fuzz
+class InvoiceGenerator:
+    def __init__(
+            self,
+            fais_index_path,
+            image_invoice_index_path,
+            path_to_invoices,
+            path_to_images,
+            reranker_model=None,
+            device="cuda",
+            max_model_len=4096, max_tokens=2048, gpu_memory_utilization=0.95
+    ):
+        self.model = Pixtral(max_model_len=max_model_len, max_tokens=max_tokens,
+                             gpu_memory_utilization=gpu_memory_utilization)
+        if reranker_model:
+            self.reranker = Reranker(model_name=reranker_model, device=device)
+        self.device = device
+        self.rag = RAG(
+            fais_index_path=fais_index_path,
+            image_invoice_index_path=image_invoice_index_path,
+            path_to_invoices=path_to_invoices,
+            path_to_images=path_to_images,
+            reranker=self.reranker
+        )
+        self.path_to_invoices = path_to_invoices
+        self.path_to_images = path_to_images
+    def format_invoice(self, generated_invoice, output_path, template_path="data/template.md"):
+        with open(template_path, "r") as f:
+            md_text = f.read()
+        md_text = md_text.replace(r"<<table>>", generated_invoice)
+        md2pdf(output_path, md_content=md_text)
+    @staticmethod
+    def check_within_range(generated_invoice, car_parts):
+        def get_part_info(part_name, car_parts):
+            part_name = part_name.lower()
+            max_match = [None, 0]
+            for part in car_parts:
+                ratio = fuzz.WRatio(part_name, part.lower())
+                if ratio >= 90 and ratio > max_match[1]:
+                    max_match[0] = part
+                    max_match[1] = ratio
+            return max_match[0]
+        all_lines = generated_invoice.split("\n")
+        first_cost_line = 3 if all_lines[0] == '' else 2
+        last_cost_line = -2 if all_lines[-1] == '' else -1
+        lines = generated_invoice.split("\n")[first_cost_line:last_cost_line]
+        cost_lines = [[line.strip() for line in cost_line.split("|")] for cost_line in lines]
+        comparing_results = {}
+        for line in cost_lines:
+            part = line[0]
+            cost = line[1]
+            hours = line[2]
+            found_part = get_part_info(part, car_parts)
+            if found_part:
+                comparing_results[part] = {
+                    "cost_within_range": car_parts[found_part]["cost_range"][0] <= float(cost) <=
+                                         car_parts[found_part]["cost_range"][1],
+                    "hours_within_range": car_parts[found_part]["hours_range"][0] <= float(hours) <=
+                                          car_parts[found_part]["hours_range"][1],
+                    "cost_diff": float(cost) - car_parts[found_part]["average_cost"],
+                    "hours_diff": float(hours) - car_parts[found_part]["average_hours"],
+                    "part_info": found_part
+                }
+            else:
+                comparing_results[part] = {}
+        return comparing_results
+    @staticmethod
+    def check_calculations(generated_invoice):
+        all_lines = generated_invoice.split("\n")
+        first_cost_line = 3 if all_lines[0] == '' else 2
+        last_cost_line = -2 if all_lines[-1] == '' else -1
+        total_cost_line = all_lines[last_cost_line]
+        lines = generated_invoice.split("\n")[first_cost_line:last_cost_line]
+        cost_lines = [[line.strip() for line in cost_line.split("|")] for cost_line in lines]
+        costs = [int(line[1]) + int(line[2]) * int(line[3]) for line in cost_lines]
+        cost_lines = list(map(lambda x, y: [x[0], x[1], x[2], x[3], str(y)], cost_lines, costs))
+        total_cost = sum(costs)
+        total_cost_line = re.sub(r"\d+", f"{total_cost}", total_cost_line)
+        all_lines[last_cost_line] = total_cost_line
+        all_lines[first_cost_line:last_cost_line] = list(map(lambda x: " | ".join(x), cost_lines))
+        return "\n".join(all_lines)
+    def generate_invoice(self, image_path, output_path=None, template_path="data/template.md", car_parts=None):
+        result = {}
+        damage_description = self.model.generate_message_from_image(
+            GENERATE_BRIEF_DAMAGE_DESCRIPTION_PROMPT, image_path
+        )
+        if damage_description == "Irrelevant." or len(damage_description.split()) < 5:
+            return None
+        result["damage_description"] = damage_description
+        print("Damage Description:", damage_description)
+        invoice_info, invoice_path = self.rag.find_invoice(
+            image_path=image_path, return_only_path=False, damage_description=damage_description, k=5
+        )
+        invoice_info = invoice_info[0]
+        invoice_path = invoice_path[0]
+        result["invoice_info"] = invoice_info
+        result["invoice_path"] = invoice_path
+        result["similar_image"] = invoice_path.replace(".pdf", ".png")
+        print("Invoice Path:", invoice_path)
+        detailed_damage_description = self.model.generate_message_from_image(
+            GENERATE_DETAILED_DAMAGE_DESCRIPTION_PROMPT, image_path
+        )
+        result["detailed_damage_description"] = detailed_damage_description
+        print("Detailed Damage Description:", detailed_damage_description)
+        generated_invoice = self.model.generate_message_from_image(
+            GENERATE_INVOICE_PROMPT(invoice_info, detailed_damage_description), image_path
+        ).replace("```markdown", "").replace("```", "")
+        generated_invoice = self.check_calculations(generated_invoice)
+        result["generated_invoice"] = generated_invoice
+        if car_parts:
+            comparing_results = self.check_within_range(generated_invoice, car_parts)
+            result["comparing_results"] = comparing_results
+            print(comparing_results)
+        if output_path:
+            self.format_invoice(generated_invoice=generated_invoice, output_path=output_path,
+                                template_path=template_path)
+        return result

src/prompts.py ADDED Viewed

	@@ -0,0 +1,72 @@

+GENERATE_INVOICE_PROMPT = """
+Given an image of a car accident, damages description, and an example of invoice for a similar car accident case, generate a repair invoice specifically for the provided image.
+The invoice should include details on the parts needing replacement, labor hours, and costs. Structure it as a standard repair invoice typical for Bavaria Direct, in German.
+Include only repair and cost information, with no contact details or extra data.
+Important: Use the example invoice as a reference only. Do not copy it directly, but you can use labor costs and part names as a reference, if they are relevant to the image.
+The invoice should include a list of items, where each item has the following fields:
+Beschädigtes Teil (Damaged Part)
+Teilkosten (Part Cost, EUR)
+Arbeitsstunden (Labor Hours)
+Arbeitskosten (Labor Cost per Hour, EUR/Stunde)
+Gesamtkosten (Total Cost, EUR)
+**Invoice for the similar car accident (for reference only):**
+{0}
+**Damages description**
+{1}
+---
+**Guidelines for Generation:**
+1. **Unique Response**: Generate reasonable variations in part types, costs, and labor hours based on the damage visible in the provided image.
+2. **Check Calculations**: Ensure that **Gesamtkosten** for each item is calculated as:
+   \[
+   Gesamtkosten = Teilkosten + (Arbeitsstunden x Arbeitskosten)
+   \]
+   The **Gesamtsumme** should reflect the sum of all **Gesamtkosten** entries.
+Generate a detailed and realistic invoice tailored specifically to the image, without replicating the example.
+OUTPUT REQUIREMENTS:
+Your output should be a table in markdown format WITHOUT ANY ADDITIONAL COMMENTS. The format of a table is provided in reference invoice examle.
+""".format
+GENERATE_BRIEF_DAMAGE_DESCRIPTION_PROMPT = """
+You will be provided with an image of a car accident.
+If the provided photo is not a photo of a car accident or a damaged car (the picture should look like "First Notice of Loss" photo), you must write "Irrelevant." and nothing else.
+If a car is completely destroyed and it is not possible to repair it, you must write "Irrelevant." and nothing else and stop following the instructions.
+Otherwise, provide a brief description of the damage.
+The description should include the type of damage and the affected area of the vehicle. Pay special attention to the parts of the vehicle that are damaged. Be concise and specific, focusing on the visible damage in the image.
+Write no more than 1-2 sentences describing the damage in detail.
+Do not add any additional information beyond the visible damage in the image and any comments.
+"""
+GENERATE_DETAILED_DAMAGE_DESCRIPTION_PROMPT = """
+Given an image of a car accident, provide a detailed and cosine description of all visible damage.
+The description should include the type of damage, the affected area of the vehicle, and any other relevant details.
+Be thorough and specific, covering all visible damage in the image.
+Write no more than 3-4 sentences describing the damage in detail.
+Do not add any additional information beyond the visible damage in the image and any comments.
+"""
+ESTIMATE_COST_OF_CAR_PARTS_REPLACEMENT_PROMPT = """
+I will provide you with a list of car parts that need to be replaced due to damage in a car accident (in German).
+Your task is to estimate the cost of each part in Euros (EUR) based on the damage description provided.
+Also, you need to estimate approximate labor hours required to replace each part.
+There may be duplicate parts in the list (for example, for right and left sides of the car).
+You may also expand the list with new parts which are not in the list but may be necessary for the repair.
+Delete any duplicate entries and provide the cost and labor hours for one side only.
+Sort the parts in the list in ascending order based on name of each part (in alphabetical order).
+Your output should be a python dictionary where the keys are the names of the parts and the values are lists containing the estimated cost and labor hours for each part.
+"""