rag_with_inline_citations

Sleeping

App Files Files Community

Adrien commited on Jan 25

Commit

cc3f1e1

1 Parent(s): 9b37798

feat: add inline sources and query classifier

Browse files

Files changed (37) hide show

.gitignore +5 -0
README.md +1 -1
pyproject.toml +1 -0
rag_demo/__pycache__/__init__.cpython-312.pyc +0 -0
rag_demo/__pycache__/pipeline.cpython-312.pyc +0 -0
rag_demo/infra/__pycache__/qdrant.cpython-312.pyc +0 -0
rag_demo/preprocessing/__pycache__/__init__.cpython-312.pyc +0 -0
rag_demo/preprocessing/__pycache__/chunking.cpython-312.pyc +0 -0
rag_demo/preprocessing/__pycache__/embed.cpython-312.pyc +0 -0
rag_demo/preprocessing/__pycache__/load_to_vectordb.cpython-312.pyc +0 -0
rag_demo/preprocessing/__pycache__/pdf_conversion.cpython-312.pyc +0 -0
rag_demo/preprocessing/base/__pycache__/__init__.cpython-312.pyc +0 -0
rag_demo/preprocessing/base/__pycache__/chunk.cpython-312.pyc +0 -0
rag_demo/preprocessing/base/__pycache__/document.cpython-312.pyc +0 -0
rag_demo/preprocessing/base/__pycache__/embedded_chunk.cpython-312.pyc +0 -0
rag_demo/preprocessing/base/__pycache__/vectordb.cpython-312.pyc +0 -0
rag_demo/preprocessing/base/vectordb.py +1 -1
rag_demo/preprocessing/embed.py +7 -5
rag_demo/rag/__pycache__/context_generator.cpython-312.pyc +0 -0
rag_demo/rag/__pycache__/prompt_templates.cpython-312.pyc +0 -0
rag_demo/rag/__pycache__/query_classifier.cpython-312.pyc +0 -0
rag_demo/rag/__pycache__/query_expansion.cpython-312.pyc +0 -0
rag_demo/rag/__pycache__/reranker.cpython-312.pyc +0 -0
rag_demo/rag/__pycache__/retriever.cpython-312.pyc +0 -0
rag_demo/rag/__pycache__/source_annotator.cpython-312.pyc +0 -0
rag_demo/rag/base/__pycache__/__init__.cpython-312.pyc +0 -0
rag_demo/rag/base/__pycache__/query.cpython-312.pyc +0 -0
rag_demo/rag/base/__pycache__/template_factory.cpython-312.pyc +0 -0
rag_demo/rag/prompt_templates.py +3 -3
rag_demo/rag/query_classifier.py +27 -0
rag_demo/rag/query_expansion.py +7 -9
rag_demo/rag/reranker.py +1 -0
rag_demo/rag/retriever.py +40 -17
rag_demo/rag/source_annotator.py +43 -0
rag_demo/settings.py +2 -3
templates/chat.html +0 -3
uv.lock +77 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+.env
+*/.env
+.venv/
+.mypy_cache/
+data/*

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: matriv-rag-demo
 colorFrom: blue
 colorTo: red
 sdk: docker

 ---
+title: rag-with-inline-citations
 colorFrom: blue
 colorTo: red
 sdk: docker

pyproject.toml CHANGED Viewed

@@ -15,4 +15,5 @@ dependencies = [
     "uvicorn>=0.32.1",
     "huggingface-hub>=0.26.3",
     "llama-parse>=0.5.17",
 ]

     "uvicorn>=0.32.1",
     "huggingface-hub>=0.26.3",
     "llama-parse>=0.5.17",
+    "openai>=1.60.1",
 ]

rag_demo/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (250 Bytes). View file

rag_demo/__pycache__/pipeline.cpython-312.pyc ADDED Viewed

Binary file (624 Bytes). View file

rag_demo/infra/__pycache__/qdrant.cpython-312.pyc ADDED Viewed

Binary file (1.22 kB). View file

rag_demo/preprocessing/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (392 Bytes). View file

rag_demo/preprocessing/__pycache__/chunking.cpython-312.pyc ADDED Viewed

Binary file (1.11 kB). View file

rag_demo/preprocessing/__pycache__/embed.cpython-312.pyc ADDED Viewed

Binary file (3.24 kB). View file

rag_demo/preprocessing/__pycache__/load_to_vectordb.cpython-312.pyc ADDED Viewed

Binary file (2.1 kB). View file

rag_demo/preprocessing/__pycache__/pdf_conversion.cpython-312.pyc ADDED Viewed

Binary file (1.8 kB). View file

rag_demo/preprocessing/base/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (454 Bytes). View file

rag_demo/preprocessing/base/__pycache__/chunk.cpython-312.pyc ADDED Viewed

Binary file (769 Bytes). View file

rag_demo/preprocessing/base/__pycache__/document.cpython-312.pyc ADDED Viewed

Binary file (921 Bytes). View file

rag_demo/preprocessing/base/__pycache__/embedded_chunk.cpython-312.pyc ADDED Viewed

Binary file (1.83 kB). View file

rag_demo/preprocessing/base/__pycache__/vectordb.cpython-312.pyc ADDED Viewed

Binary file (14.2 kB). View file

rag_demo/preprocessing/base/vectordb.py CHANGED Viewed

@@ -15,7 +15,7 @@ from rag_demo.infra.qdrant import connection
 T = TypeVar("T", bound="VectorBaseDocument")
-EMBEDDING_SIZE = 1024
 class VectorBaseDocument(BaseModel, Generic[T], ABC):

 T = TypeVar("T", bound="VectorBaseDocument")
+EMBEDDING_SIZE = 1536
 class VectorBaseDocument(BaseModel, Generic[T], ABC):

rag_demo/preprocessing/embed.py CHANGED Viewed

@@ -8,6 +8,7 @@ import os
 from dotenv import load_dotenv
 from uuid import uuid4
 from loguru import logger
 load_dotenv()
@@ -17,10 +18,7 @@ def batch(list_: list, size: int) -> Generator[list, None, None]:
 def embed_chunks(chunks: list[Chunk]) -> list[EmbeddedChunk]:
-    api = InferenceClient(
-        model="intfloat/multilingual-e5-large-instruct",
-        token=os.getenv("HF_API_TOKEN"),
-    )
     logger.info(f"Embedding {len(chunks)} chunks")
     embedded_chunks = []
     for chunk in chunks:
@@ -29,7 +27,11 @@ def embed_chunks(chunks: list[Chunk]) -> list[EmbeddedChunk]:
                 EmbeddedChunk(
                     id=uuid4(),
                     content=chunk.content,
-                    embedding=api.feature_extraction(chunk.content),
                     document_id=chunk.document_id,
                     chunk_id=chunk.chunk_id,
                     metadata=chunk.metadata,

 from dotenv import load_dotenv
 from uuid import uuid4
 from loguru import logger
+from openai import OpenAI
 load_dotenv()
 def embed_chunks(chunks: list[Chunk]) -> list[EmbeddedChunk]:
+    api = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
     logger.info(f"Embedding {len(chunks)} chunks")
     embedded_chunks = []
     for chunk in chunks:
                 EmbeddedChunk(
                     id=uuid4(),
                     content=chunk.content,
+                    embedding=api.embeddings.create(
+                        model="text-embedding-3-small", input=chunk.content
+                    )
+                    .data[0]
+                    .embedding,
                     document_id=chunk.document_id,
                     chunk_id=chunk.chunk_id,
                     metadata=chunk.metadata,

rag_demo/rag/__pycache__/context_generator.cpython-312.pyc ADDED Viewed

Binary file (2.8 kB). View file

rag_demo/rag/__pycache__/prompt_templates.cpython-312.pyc ADDED Viewed

Binary file (2.33 kB). View file

rag_demo/rag/__pycache__/query_classifier.cpython-312.pyc ADDED Viewed

Binary file (1.83 kB). View file

rag_demo/rag/__pycache__/query_expansion.cpython-312.pyc ADDED Viewed

Binary file (2.09 kB). View file

rag_demo/rag/__pycache__/reranker.cpython-312.pyc ADDED Viewed

Binary file (1.62 kB). View file

rag_demo/rag/__pycache__/retriever.cpython-312.pyc ADDED Viewed

Binary file (8.09 kB). View file

rag_demo/rag/__pycache__/source_annotator.cpython-312.pyc ADDED Viewed

Binary file (2.77 kB). View file

rag_demo/rag/base/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (270 Bytes). View file

rag_demo/rag/base/__pycache__/query.cpython-312.pyc ADDED Viewed

Binary file (1.75 kB). View file

rag_demo/rag/base/__pycache__/template_factory.cpython-312.pyc ADDED Viewed

Binary file (1.37 kB). View file

rag_demo/rag/prompt_templates.py CHANGED Viewed

@@ -29,10 +29,10 @@ class QueryExpansionTemplate(PromptTemplateFactory):
 class AnswerGenerationTemplate(PromptTemplateFactory):
     prompt: str = """You are an AI language model assistant. Your task is to generate an answer to the given user question based on the provided context.
     Context: {context}
-    Question: {question}
-    Give your answer in markdown format if needed, for example if a table is the best way to answer the question, or if titles and subheadings are needed.
-    Give only your answer, do not include any other text like 'Certainly! Here is the answer:' or 'The answer is:' or anything similar."""
     def create_template(self, context: str, question: str) -> str:
         return self.prompt.format(context=context, question=question)

 class AnswerGenerationTemplate(PromptTemplateFactory):
     prompt: str = """You are an AI language model assistant. Your task is to generate an answer to the given user question based on the provided context.
     Context: {context}
+    Question: {question}"""
+    # Give only your answer, do not include any other text like 'Certainly! Here is the answer:' or 'The answer is:' or anything similar.
+    # Give your answer in markdown format if needed, for example if a table is the best way to answer the question, or if titles and subheadings are needed.
     def create_template(self, context: str, question: str) -> str:
         return self.prompt.format(context=context, question=question)

rag_demo/rag/query_classifier.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import os
+from typing import Any
+from openai import OpenAI
+from rag_demo.rag.base.query import Query
+from rag_demo.rag.base.template_factory import RAGStep
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from loguru import logger
+import torch
+model_name = (
+    "AdrienB134/greetings-classifier"  # Model trained on English greetings only
+)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForSequenceClassification.from_pretrained(model_name)
+class QueryClassifier(RAGStep):
+    def generate(self, query: Query) -> Any:
+        if self._mock:
+            return "Sources_needed"
+        with torch.no_grad():
+            inputs = tokenizer(query.content, return_tensors="pt")
+            logits = model(**inputs).logits
+            predictions = logits.argmax()
+        return model.config.id2label[predictions.item()]

rag_demo/rag/query_expansion.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import os
 from typing import Any
-from huggingface_hub import InferenceClient
 from rag_demo.rag.base.query import Query
 from rag_demo.rag.base.template_factory import RAGStep
@@ -10,14 +9,12 @@ from rag_demo.rag.prompt_templates import QueryExpansionTemplate
 class QueryExpansion(RAGStep):
     def generate(self, query: Query, expand_to_n: int) -> Any:
-        api = InferenceClient(
-            model="Qwen/Qwen2.5-72B-Instruct",
-            token=os.getenv("HF_API_TOKEN"),
-        )
         query_expansion_template = QueryExpansionTemplate()
         prompt = query_expansion_template.create_template(expand_to_n - 1)
-        response = api.chat_completion(
-            [
                 {
                     "role": "user",
                     "content": prompt.template.format(
@@ -26,7 +23,8 @@ class QueryExpansion(RAGStep):
                         separator=query_expansion_template.separator,
                     ),
                 }
-            ]
         )
         result = response.choices[0].message.content
         queries_content = result.split(query_expansion_template.separator)

 import os
 from typing import Any
+from openai import OpenAI
 from rag_demo.rag.base.query import Query
 from rag_demo.rag.base.template_factory import RAGStep
 class QueryExpansion(RAGStep):
     def generate(self, query: Query, expand_to_n: int) -> Any:
+        api = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
         query_expansion_template = QueryExpansionTemplate()
         prompt = query_expansion_template.create_template(expand_to_n - 1)
+        response = api.chat.completions.create(
+            model="gpt-4o-mini",
+            messages=[
                 {
                     "role": "user",
                     "content": prompt.template.format(
                         separator=query_expansion_template.separator,
                     ),
                 }
+            ],
+            max_tokens=8192,
         )
         result = response.choices[0].message.content
         queries_content = result.split(query_expansion_template.separator)

rag_demo/rag/reranker.py CHANGED Viewed

@@ -2,6 +2,7 @@ import os
 from huggingface_hub import InferenceClient
 from rag_demo.rag.base.query import Query
 from rag_demo.rag.base.template_factory import RAGStep
 from rag_demo.preprocessing.embed import EmbeddedChunk

 from huggingface_hub import InferenceClient
 from rag_demo.rag.base.query import Query
 from rag_demo.rag.base.template_factory import RAGStep
 from rag_demo.preprocessing.embed import EmbeddedChunk

rag_demo/rag/retriever.py CHANGED Viewed

@@ -3,7 +3,7 @@ import os
 from loguru import logger
 from qdrant_client.models import FieldCondition, Filter, MatchValue
-from huggingface_hub import InferenceClient
 from rag_demo.preprocessing.base import (
     EmbeddedChunk,
@@ -13,7 +13,8 @@ from rag_demo.rag.base.query import EmbeddedQuery, Query
 from .query_expansion import QueryExpansion
 from .reranker import Reranker
 from .prompt_templates import AnswerGenerationTemplate
 from dotenv import load_dotenv
 load_dotenv()
@@ -29,6 +30,8 @@ class RAGPipeline:
     def __init__(self, mock: bool = False) -> None:
         self._query_expander = QueryExpansion(mock=mock)
         self._reranker = Reranker(mock=mock)
     def search(
         self,
@@ -77,12 +80,13 @@ class RAGPipeline:
                 limit=k,
             )
-        api = InferenceClient(
-            model="intfloat/multilingual-e5-large-instruct",
-            token=os.getenv("HF_API_TOKEN"),
-        )
         embedded_query: EmbeddedQuery = EmbeddedQuery(
-            embedding=api.feature_extraction(query.content),
             id=query.id,
             content=query.content,
         )
@@ -111,23 +115,42 @@ class RAGPipeline:
         for chunk in reranked_chunks:
             context += "\n Document: "
             context += chunk.content
-        api = InferenceClient(
-            model="meta-llama/Llama-3.3-70B-Instruct",
-            token=os.getenv("HF_API_TOKEN"),
-        )
         answer_generation_template = AnswerGenerationTemplate()
         prompt = answer_generation_template.create_template(context, query)
         logger.info(prompt)
-        response = api.chat_completion(
-            [{"role": "user", "content": prompt}],
             max_tokens=8192,
         )
         return response.choices[0].message.content
     def rag(self, query: str) -> tuple[str, list[str]]:
-        docs = self.search(query, k=10)
-        reranked_docs = self.rerank(query, docs, keep_top_k=10)
         return (
-            self.generate_answer(query, reranked_docs),
-            list(set([doc.metadata["filename"].split(".pdf")[0] for doc in reranked_docs])),
         )

 from loguru import logger
 from qdrant_client.models import FieldCondition, Filter, MatchValue
+from openai import OpenAI
 from rag_demo.preprocessing.base import (
     EmbeddedChunk,
 from .query_expansion import QueryExpansion
 from .reranker import Reranker
 from .prompt_templates import AnswerGenerationTemplate
+from .source_annotator import SourceAnnotator
+from .query_classifier import QueryClassifier
 from dotenv import load_dotenv
 load_dotenv()
     def __init__(self, mock: bool = False) -> None:
         self._query_expander = QueryExpansion(mock=mock)
         self._reranker = Reranker(mock=mock)
+        self._source_annotator = SourceAnnotator()
+        self._query_classifier = QueryClassifier(mock=mock)
     def search(
         self,
                 limit=k,
             )
+        api = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
         embedded_query: EmbeddedQuery = EmbeddedQuery(
+            embedding=api.embeddings.create(
+                model="text-embedding-3-small", input=query.content
+            )
+            .data[0]
+            .embedding,
             id=query.id,
             content=query.content,
         )
         for chunk in reranked_chunks:
             context += "\n Document: "
             context += chunk.content
+        api = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
         answer_generation_template = AnswerGenerationTemplate()
         prompt = answer_generation_template.create_template(context, query)
         logger.info(prompt)
+        response = api.chat.completions.create(
+            model="gpt-4o-mini",
+            messages=[{"role": "user", "content": prompt}],
             max_tokens=8192,
         )
         return response.choices[0].message.content
+    def add_context(self, response: str, reranked_chunks: list[EmbeddedChunk]) -> str:
+        logger.info("Adding context to the answer")
+        return self._source_annotator.annotate(response, reranked_chunks)
     def rag(self, query: str) -> tuple[str, list[str]]:
+        query_type = self._query_classifier.generate(Query.from_str(query))
+        logger.info(f"Query type: {query_type}")
+        if query_type == "Sources_needed":
+            docs = self.search(query, k=10)
+            reranked_docs = self.rerank(query, docs, keep_top_k=10)
+        else:
+            reranked_docs = []
+        answer = self.generate_answer(query, reranked_docs)
+        if reranked_docs:
+            annotated_answer = self.add_context(answer, reranked_docs)
+        else:
+            annotated_answer = answer
         return (
+            annotated_answer,
+            list(
+                set(
+                    [doc.metadata["filename"].split(".pdf")[0] for doc in reranked_docs]
+                )
+            ),
         )

rag_demo/rag/source_annotator.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import os
+from typing import Any
+import re
+from loguru import logger
+from rag_demo.preprocessing.embed import EmbeddedChunk
+from transformers import pipeline
+class SourceAnnotator:
+    def __init__(self):
+        self.source_annotator = pipeline(
+            "question-answering",
+            model="distilbert/distilbert-base-cased-distilled-squad",
+        )
+    def annotate(self, response: str, reranked_chunks: list[EmbeddedChunk]) -> str:
+        sentences = self.split_sentences(response)
+        annotated_response = ""
+        for sentence in sentences:
+            scores = []
+            for chunk in reranked_chunks:
+                score = self.annotate_source(sentence.lower(), chunk.content.lower())
+                score["filename"] = chunk.metadata["filename"].split(".pdf")[0]
+                score["chunk_id"] = chunk.chunk_id
+                scores.append(score)
+            # Could also use a score cut-off instead of max()
+            max_score = max(scores, key=lambda x: x["score"])
+            annotated_response += f"{sentence} [filename: {max_score['filename']}, chunk_id: {max_score['chunk_id']}] "
+        return annotated_response
+    def split_sentences(self, text: str) -> list[str]:
+        pattern = r"(?<=[.!?])\s+(?=[A-Z])"
+        sentences = re.split(pattern, text)
+        return [s.strip() for s in sentences if s.strip()]
+    def annotate_source(self, text: str, chunk: str) -> dict:
+        return self.source_annotator(text, chunk)

rag_demo/settings.py CHANGED Viewed

@@ -26,10 +26,9 @@ class Settings(BaseSettings):
     @classmethod
     def load_settings(cls) -> "Settings":
         """
-        Tries to load the settings from the ZenML secret store. If the secret does not exist, it initializes the settings from the .env file and default values.
         Returns:
-            Settings: The initialized settings object.
         """
         settings = Settings()

     @classmethod
     def load_settings(cls) -> "Settings":
         """
+        Loads the settings from the .env file.
         Returns:
+                Settings: The initialized settings object.
         """
         settings = Settings()

templates/chat.html CHANGED Viewed

@@ -268,9 +268,6 @@
     </div>
     <div class="main-container">
         <div class="chat-card">
-            <div class="logo-container">
-                <img src="./static/Matriv-white.png" alt="Matriv Logo" style="width: 100px; height: auto;">
-            </div>
             <div class="chat-container" id="chatContainer">
             </div>
             <div class="input-container">

     </div>
     <div class="main-container">
         <div class="chat-card">
             <div class="chat-container" id="chatContainer">
             </div>
             <div class="input-container">

uv.lock CHANGED Viewed

@@ -271,6 +271,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/68/69/1bcf70f81de1b4a9f21b3a62ec0c83bdff991c88d6cc2267d02408457e88/dirtyjson-1.0.8-py3-none-any.whl", hash = "sha256:125e27248435a58acace26d5c2c4c11a1c0de0a9c5124c5a94ba78e517d74f53", size = 25197 },
 ]
 [[package]]
 name = "fastapi"
 version = "0.115.6"
@@ -651,6 +660,53 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/31/80/3a54838c3fb461f6fec263ebf3a3a41771bd05190238de3486aae8540c36/jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d", size = 133271 },
 ]
 [[package]]
 name = "joblib"
 version = "1.4.2"
@@ -1262,6 +1318,25 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/14/56/fd990ca222cef4f9f4a9400567b9a15b220dee2eafffb16b2adbc55c8281/onnxruntime-1.20.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0df6f2df83d61f46e842dbcde610ede27218947c33e994545a22333491e72a3b", size = 13337040 },
 ]
 [[package]]
 name = "opencv-python"
 version = "4.10.0.84"
@@ -1714,6 +1789,7 @@ dependencies = [
     { name = "llama-parse" },
     { name = "loguru" },
     { name = "marker-pdf" },
     { name = "pydantic" },
     { name = "python-multipart" },
     { name = "qdrant-client", extra = ["fastembed"] },
@@ -1728,6 +1804,7 @@ requires-dist = [
     { name = "llama-parse", specifier = ">=0.5.17" },
     { name = "loguru", specifier = ">=0.7.2" },
     { name = "marker-pdf", specifier = ">=1.0.2" },
     { name = "pydantic", specifier = ">=2.10.3" },
     { name = "python-multipart", specifier = ">=0.0.19" },
     { name = "qdrant-client", extras = ["fastembed"], specifier = ">=1.12.1" },

     { url = "https://files.pythonhosted.org/packages/68/69/1bcf70f81de1b4a9f21b3a62ec0c83bdff991c88d6cc2267d02408457e88/dirtyjson-1.0.8-py3-none-any.whl", hash = "sha256:125e27248435a58acace26d5c2c4c11a1c0de0a9c5124c5a94ba78e517d74f53", size = 25197 },
 ]
+[[package]]
+name = "distro"
+version = "1.9.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277 },
+]
 [[package]]
 name = "fastapi"
 version = "0.115.6"
     { url = "https://files.pythonhosted.org/packages/31/80/3a54838c3fb461f6fec263ebf3a3a41771bd05190238de3486aae8540c36/jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d", size = 133271 },
 ]
+[[package]]
+name = "jiter"
+version = "0.8.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f8/70/90bc7bd3932e651486861df5c8ffea4ca7c77d28e8532ddefe2abc561a53/jiter-0.8.2.tar.gz", hash = "sha256:cd73d3e740666d0e639f678adb176fad25c1bcbdae88d8d7b857e1783bb4212d", size = 163007 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cb/b0/c1a7caa7f9dc5f1f6cfa08722867790fe2d3645d6e7170ca280e6e52d163/jiter-0.8.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:2dd61c5afc88a4fda7d8b2cf03ae5947c6ac7516d32b7a15bf4b49569a5c076b", size = 303666 },
+    { url = "https://files.pythonhosted.org/packages/f5/97/0468bc9eeae43079aaa5feb9267964e496bf13133d469cfdc135498f8dd0/jiter-0.8.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a6c710d657c8d1d2adbbb5c0b0c6bfcec28fd35bd6b5f016395f9ac43e878a15", size = 311934 },
+    { url = "https://files.pythonhosted.org/packages/e5/69/64058e18263d9a5f1e10f90c436853616d5f047d997c37c7b2df11b085ec/jiter-0.8.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9584de0cd306072635fe4b89742bf26feae858a0683b399ad0c2509011b9dc0", size = 335506 },
+    { url = "https://files.pythonhosted.org/packages/9d/14/b747f9a77b8c0542141d77ca1e2a7523e854754af2c339ac89a8b66527d6/jiter-0.8.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5a90a923338531b7970abb063cfc087eebae6ef8ec8139762007188f6bc69a9f", size = 355849 },
+    { url = "https://files.pythonhosted.org/packages/53/e2/98a08161db7cc9d0e39bc385415890928ff09709034982f48eccfca40733/jiter-0.8.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d21974d246ed0181558087cd9f76e84e8321091ebfb3a93d4c341479a736f099", size = 381700 },
+    { url = "https://files.pythonhosted.org/packages/7a/38/1674672954d35bce3b1c9af99d5849f9256ac8f5b672e020ac7821581206/jiter-0.8.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:32475a42b2ea7b344069dc1e81445cfc00b9d0e3ca837f0523072432332e9f74", size = 389710 },
+    { url = "https://files.pythonhosted.org/packages/f8/9b/92f9da9a9e107d019bcf883cd9125fa1690079f323f5a9d5c6986eeec3c0/jiter-0.8.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b9931fd36ee513c26b5bf08c940b0ac875de175341cbdd4fa3be109f0492586", size = 345553 },
+    { url = "https://files.pythonhosted.org/packages/44/a6/6d030003394e9659cd0d7136bbeabd82e869849ceccddc34d40abbbbb269/jiter-0.8.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ce0820f4a3a59ddced7fce696d86a096d5cc48d32a4183483a17671a61edfddc", size = 376388 },
+    { url = "https://files.pythonhosted.org/packages/ad/8d/87b09e648e4aca5f9af89e3ab3cfb93db2d1e633b2f2931ede8dabd9b19a/jiter-0.8.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8ffc86ae5e3e6a93765d49d1ab47b6075a9c978a2b3b80f0f32628f39caa0c88", size = 511226 },
+    { url = "https://files.pythonhosted.org/packages/77/95/8008ebe4cdc82eac1c97864a8042ca7e383ed67e0ec17bfd03797045c727/jiter-0.8.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5127dc1abd809431172bc3fbe8168d6b90556a30bb10acd5ded41c3cfd6f43b6", size = 504134 },
+    { url = "https://files.pythonhosted.org/packages/26/0d/3056a74de13e8b2562e4d526de6dac2f65d91ace63a8234deb9284a1d24d/jiter-0.8.2-cp311-cp311-win32.whl", hash = "sha256:66227a2c7b575720c1871c8800d3a0122bb8ee94edb43a5685aa9aceb2782d44", size = 203103 },
+    { url = "https://files.pythonhosted.org/packages/4e/1e/7f96b798f356e531ffc0f53dd2f37185fac60fae4d6c612bbbd4639b90aa/jiter-0.8.2-cp311-cp311-win_amd64.whl", hash = "sha256:cde031d8413842a1e7501e9129b8e676e62a657f8ec8166e18a70d94d4682855", size = 206717 },
+    { url = "https://files.pythonhosted.org/packages/a1/17/c8747af8ea4e045f57d6cfd6fc180752cab9bc3de0e8a0c9ca4e8af333b1/jiter-0.8.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:e6ec2be506e7d6f9527dae9ff4b7f54e68ea44a0ef6b098256ddf895218a2f8f", size = 302027 },
+    { url = "https://files.pythonhosted.org/packages/3c/c1/6da849640cd35a41e91085723b76acc818d4b7d92b0b6e5111736ce1dd10/jiter-0.8.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:76e324da7b5da060287c54f2fabd3db5f76468006c811831f051942bf68c9d44", size = 310326 },
+    { url = "https://files.pythonhosted.org/packages/06/99/a2bf660d8ccffee9ad7ed46b4f860d2108a148d0ea36043fd16f4dc37e94/jiter-0.8.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:180a8aea058f7535d1c84183c0362c710f4750bef66630c05f40c93c2b152a0f", size = 334242 },
+    { url = "https://files.pythonhosted.org/packages/a7/5f/cea1c17864828731f11427b9d1ab7f24764dbd9aaf4648a7f851164d2718/jiter-0.8.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:025337859077b41548bdcbabe38698bcd93cfe10b06ff66617a48ff92c9aec60", size = 356654 },
+    { url = "https://files.pythonhosted.org/packages/e9/13/62774b7e5e7f5d5043efe1d0f94ead66e6d0f894ae010adb56b3f788de71/jiter-0.8.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ecff0dc14f409599bbcafa7e470c00b80f17abc14d1405d38ab02e4b42e55b57", size = 379967 },
+    { url = "https://files.pythonhosted.org/packages/ec/fb/096b34c553bb0bd3f2289d5013dcad6074948b8d55212aa13a10d44c5326/jiter-0.8.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ffd9fee7d0775ebaba131f7ca2e2d83839a62ad65e8e02fe2bd8fc975cedeb9e", size = 389252 },
+    { url = "https://files.pythonhosted.org/packages/17/61/beea645c0bf398ced8b199e377b61eb999d8e46e053bb285c91c3d3eaab0/jiter-0.8.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14601dcac4889e0a1c75ccf6a0e4baf70dbc75041e51bcf8d0e9274519df6887", size = 345490 },
+    { url = "https://files.pythonhosted.org/packages/d5/df/834aa17ad5dcc3cf0118821da0a0cf1589ea7db9832589278553640366bc/jiter-0.8.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:92249669925bc1c54fcd2ec73f70f2c1d6a817928480ee1c65af5f6b81cdf12d", size = 376991 },
+    { url = "https://files.pythonhosted.org/packages/67/80/87d140399d382fb4ea5b3d56e7ecaa4efdca17cd7411ff904c1517855314/jiter-0.8.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e725edd0929fa79f8349ab4ec7f81c714df51dc4e991539a578e5018fa4a7152", size = 510822 },
+    { url = "https://files.pythonhosted.org/packages/5c/37/3394bb47bac1ad2cb0465601f86828a0518d07828a650722e55268cdb7e6/jiter-0.8.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bf55846c7b7a680eebaf9c3c48d630e1bf51bdf76c68a5f654b8524335b0ad29", size = 503730 },
+    { url = "https://files.pythonhosted.org/packages/f9/e2/253fc1fa59103bb4e3aa0665d6ceb1818df1cd7bf3eb492c4dad229b1cd4/jiter-0.8.2-cp312-cp312-win32.whl", hash = "sha256:7efe4853ecd3d6110301665a5178b9856be7e2a9485f49d91aa4d737ad2ae49e", size = 203375 },
+    { url = "https://files.pythonhosted.org/packages/41/69/6d4bbe66b3b3b4507e47aa1dd5d075919ad242b4b1115b3f80eecd443687/jiter-0.8.2-cp312-cp312-win_amd64.whl", hash = "sha256:83c0efd80b29695058d0fd2fa8a556490dbce9804eac3e281f373bbc99045f6c", size = 204740 },
+    { url = "https://files.pythonhosted.org/packages/6c/b0/bfa1f6f2c956b948802ef5a021281978bf53b7a6ca54bb126fd88a5d014e/jiter-0.8.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:ca1f08b8e43dc3bd0594c992fb1fd2f7ce87f7bf0d44358198d6da8034afdf84", size = 301190 },
+    { url = "https://files.pythonhosted.org/packages/a4/8f/396ddb4e292b5ea57e45ade5dc48229556b9044bad29a3b4b2dddeaedd52/jiter-0.8.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5672a86d55416ccd214c778efccf3266b84f87b89063b582167d803246354be4", size = 309334 },
+    { url = "https://files.pythonhosted.org/packages/7f/68/805978f2f446fa6362ba0cc2e4489b945695940656edd844e110a61c98f8/jiter-0.8.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58dc9bc9767a1101f4e5e22db1b652161a225874d66f0e5cb8e2c7d1c438b587", size = 333918 },
+    { url = "https://files.pythonhosted.org/packages/b3/99/0f71f7be667c33403fa9706e5b50583ae5106d96fab997fa7e2f38ee8347/jiter-0.8.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:37b2998606d6dadbb5ccda959a33d6a5e853252d921fec1792fc902351bb4e2c", size = 356057 },
+    { url = "https://files.pythonhosted.org/packages/8d/50/a82796e421a22b699ee4d2ce527e5bcb29471a2351cbdc931819d941a167/jiter-0.8.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4ab9a87f3784eb0e098f84a32670cfe4a79cb6512fd8f42ae3d0709f06405d18", size = 379790 },
+    { url = "https://files.pythonhosted.org/packages/3c/31/10fb012b00f6d83342ca9e2c9618869ab449f1aa78c8f1b2193a6b49647c/jiter-0.8.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:79aec8172b9e3c6d05fd4b219d5de1ac616bd8da934107325a6c0d0e866a21b6", size = 388285 },
+    { url = "https://files.pythonhosted.org/packages/c8/81/f15ebf7de57be488aa22944bf4274962aca8092e4f7817f92ffa50d3ee46/jiter-0.8.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:711e408732d4e9a0208008e5892c2966b485c783cd2d9a681f3eb147cf36c7ef", size = 344764 },
+    { url = "https://files.pythonhosted.org/packages/b3/e8/0cae550d72b48829ba653eb348cdc25f3f06f8a62363723702ec18e7be9c/jiter-0.8.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:653cf462db4e8c41995e33d865965e79641ef45369d8a11f54cd30888b7e6ff1", size = 376620 },
+    { url = "https://files.pythonhosted.org/packages/b8/50/e5478ff9d82534a944c03b63bc217c5f37019d4a34d288db0f079b13c10b/jiter-0.8.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:9c63eaef32b7bebac8ebebf4dabebdbc6769a09c127294db6babee38e9f405b9", size = 510402 },
+    { url = "https://files.pythonhosted.org/packages/8e/1e/3de48bbebbc8f7025bd454cedc8c62378c0e32dd483dece5f4a814a5cb55/jiter-0.8.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:eb21aaa9a200d0a80dacc7a81038d2e476ffe473ffdd9c91eb745d623561de05", size = 503018 },
+    { url = "https://files.pythonhosted.org/packages/d5/cd/d5a5501d72a11fe3e5fd65c78c884e5164eefe80077680533919be22d3a3/jiter-0.8.2-cp313-cp313-win32.whl", hash = "sha256:789361ed945d8d42850f919342a8665d2dc79e7e44ca1c97cc786966a21f627a", size = 203190 },
+    { url = "https://files.pythonhosted.org/packages/51/bf/e5ca301245ba951447e3ad677a02a64a8845b185de2603dabd83e1e4b9c6/jiter-0.8.2-cp313-cp313-win_amd64.whl", hash = "sha256:ab7f43235d71e03b941c1630f4b6e3055d46b6cb8728a17663eaac9d8e83a865", size = 203551 },
+    { url = "https://files.pythonhosted.org/packages/2f/3c/71a491952c37b87d127790dd7a0b1ebea0514c6b6ad30085b16bbe00aee6/jiter-0.8.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b426f72cd77da3fec300ed3bc990895e2dd6b49e3bfe6c438592a3ba660e41ca", size = 308347 },
+    { url = "https://files.pythonhosted.org/packages/a0/4c/c02408042e6a7605ec063daed138e07b982fdb98467deaaf1c90950cf2c6/jiter-0.8.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2dd880785088ff2ad21ffee205e58a8c1ddabc63612444ae41e5e4b321b39c0", size = 342875 },
+    { url = "https://files.pythonhosted.org/packages/91/61/c80ef80ed8a0a21158e289ef70dac01e351d929a1c30cb0f49be60772547/jiter-0.8.2-cp313-cp313t-win_amd64.whl", hash = "sha256:3ac9f578c46f22405ff7f8b1f5848fb753cc4b8377fbec8470a7dc3997ca7566", size = 202374 },
+]
 [[package]]
 name = "joblib"
 version = "1.4.2"
     { url = "https://files.pythonhosted.org/packages/14/56/fd990ca222cef4f9f4a9400567b9a15b220dee2eafffb16b2adbc55c8281/onnxruntime-1.20.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0df6f2df83d61f46e842dbcde610ede27218947c33e994545a22333491e72a3b", size = 13337040 },
 ]
+[[package]]
+name = "openai"
+version = "1.60.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "distro" },
+    { name = "httpx" },
+    { name = "jiter" },
+    { name = "pydantic" },
+    { name = "sniffio" },
+    { name = "tqdm" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/4c/c4/a220c957aa4097f25498770c6eff8f3abd35934a8859e7a78928a8a70846/openai-1.60.1.tar.gz", hash = "sha256:beb1541dfc38b002bd629ab68b0d6fe35b870c5f4311d9bc4404d85af3214d5e", size = 348070 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7a/ad/55b2d03feda5a0adc0a86048dcb7c9863fd24a3726815a04d5669e82e41e/openai-1.60.1-py3-none-any.whl", hash = "sha256:714181ec1c452353d456f143c22db892de7b373e3165063d02a2b798ed575ba1", size = 456110 },
+]
 [[package]]
 name = "opencv-python"
 version = "4.10.0.84"
     { name = "llama-parse" },
     { name = "loguru" },
     { name = "marker-pdf" },
+    { name = "openai" },
     { name = "pydantic" },
     { name = "python-multipart" },
     { name = "qdrant-client", extra = ["fastembed"] },
     { name = "llama-parse", specifier = ">=0.5.17" },
     { name = "loguru", specifier = ">=0.7.2" },
     { name = "marker-pdf", specifier = ">=1.0.2" },
+    { name = "openai", specifier = ">=1.60.1" },
     { name = "pydantic", specifier = ">=2.10.3" },
     { name = "python-multipart", specifier = ">=0.0.19" },
     { name = "qdrant-client", extras = ["fastembed"], specifier = ">=1.12.1" },