Adrien commited on
Commit
cc3f1e1
·
1 Parent(s): 9b37798

feat: add inline sources and query classifier

Browse files
Files changed (37) hide show
  1. .gitignore +5 -0
  2. README.md +1 -1
  3. pyproject.toml +1 -0
  4. rag_demo/__pycache__/__init__.cpython-312.pyc +0 -0
  5. rag_demo/__pycache__/pipeline.cpython-312.pyc +0 -0
  6. rag_demo/infra/__pycache__/qdrant.cpython-312.pyc +0 -0
  7. rag_demo/preprocessing/__pycache__/__init__.cpython-312.pyc +0 -0
  8. rag_demo/preprocessing/__pycache__/chunking.cpython-312.pyc +0 -0
  9. rag_demo/preprocessing/__pycache__/embed.cpython-312.pyc +0 -0
  10. rag_demo/preprocessing/__pycache__/load_to_vectordb.cpython-312.pyc +0 -0
  11. rag_demo/preprocessing/__pycache__/pdf_conversion.cpython-312.pyc +0 -0
  12. rag_demo/preprocessing/base/__pycache__/__init__.cpython-312.pyc +0 -0
  13. rag_demo/preprocessing/base/__pycache__/chunk.cpython-312.pyc +0 -0
  14. rag_demo/preprocessing/base/__pycache__/document.cpython-312.pyc +0 -0
  15. rag_demo/preprocessing/base/__pycache__/embedded_chunk.cpython-312.pyc +0 -0
  16. rag_demo/preprocessing/base/__pycache__/vectordb.cpython-312.pyc +0 -0
  17. rag_demo/preprocessing/base/vectordb.py +1 -1
  18. rag_demo/preprocessing/embed.py +7 -5
  19. rag_demo/rag/__pycache__/context_generator.cpython-312.pyc +0 -0
  20. rag_demo/rag/__pycache__/prompt_templates.cpython-312.pyc +0 -0
  21. rag_demo/rag/__pycache__/query_classifier.cpython-312.pyc +0 -0
  22. rag_demo/rag/__pycache__/query_expansion.cpython-312.pyc +0 -0
  23. rag_demo/rag/__pycache__/reranker.cpython-312.pyc +0 -0
  24. rag_demo/rag/__pycache__/retriever.cpython-312.pyc +0 -0
  25. rag_demo/rag/__pycache__/source_annotator.cpython-312.pyc +0 -0
  26. rag_demo/rag/base/__pycache__/__init__.cpython-312.pyc +0 -0
  27. rag_demo/rag/base/__pycache__/query.cpython-312.pyc +0 -0
  28. rag_demo/rag/base/__pycache__/template_factory.cpython-312.pyc +0 -0
  29. rag_demo/rag/prompt_templates.py +3 -3
  30. rag_demo/rag/query_classifier.py +27 -0
  31. rag_demo/rag/query_expansion.py +7 -9
  32. rag_demo/rag/reranker.py +1 -0
  33. rag_demo/rag/retriever.py +40 -17
  34. rag_demo/rag/source_annotator.py +43 -0
  35. rag_demo/settings.py +2 -3
  36. templates/chat.html +0 -3
  37. uv.lock +77 -0
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ .env
2
+ */.env
3
+ .venv/
4
+ .mypy_cache/
5
+ data/*
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: matriv-rag-demo
3
  colorFrom: blue
4
  colorTo: red
5
  sdk: docker
 
1
  ---
2
+ title: rag-with-inline-citations
3
  colorFrom: blue
4
  colorTo: red
5
  sdk: docker
pyproject.toml CHANGED
@@ -15,4 +15,5 @@ dependencies = [
15
  "uvicorn>=0.32.1",
16
  "huggingface-hub>=0.26.3",
17
  "llama-parse>=0.5.17",
 
18
  ]
 
15
  "uvicorn>=0.32.1",
16
  "huggingface-hub>=0.26.3",
17
  "llama-parse>=0.5.17",
18
+ "openai>=1.60.1",
19
  ]
rag_demo/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (250 Bytes). View file
 
rag_demo/__pycache__/pipeline.cpython-312.pyc ADDED
Binary file (624 Bytes). View file
 
rag_demo/infra/__pycache__/qdrant.cpython-312.pyc ADDED
Binary file (1.22 kB). View file
 
rag_demo/preprocessing/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (392 Bytes). View file
 
rag_demo/preprocessing/__pycache__/chunking.cpython-312.pyc ADDED
Binary file (1.11 kB). View file
 
rag_demo/preprocessing/__pycache__/embed.cpython-312.pyc ADDED
Binary file (3.24 kB). View file
 
rag_demo/preprocessing/__pycache__/load_to_vectordb.cpython-312.pyc ADDED
Binary file (2.1 kB). View file
 
rag_demo/preprocessing/__pycache__/pdf_conversion.cpython-312.pyc ADDED
Binary file (1.8 kB). View file
 
rag_demo/preprocessing/base/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (454 Bytes). View file
 
rag_demo/preprocessing/base/__pycache__/chunk.cpython-312.pyc ADDED
Binary file (769 Bytes). View file
 
rag_demo/preprocessing/base/__pycache__/document.cpython-312.pyc ADDED
Binary file (921 Bytes). View file
 
rag_demo/preprocessing/base/__pycache__/embedded_chunk.cpython-312.pyc ADDED
Binary file (1.83 kB). View file
 
rag_demo/preprocessing/base/__pycache__/vectordb.cpython-312.pyc ADDED
Binary file (14.2 kB). View file
 
rag_demo/preprocessing/base/vectordb.py CHANGED
@@ -15,7 +15,7 @@ from rag_demo.infra.qdrant import connection
15
 
16
  T = TypeVar("T", bound="VectorBaseDocument")
17
 
18
- EMBEDDING_SIZE = 1024
19
 
20
 
21
  class VectorBaseDocument(BaseModel, Generic[T], ABC):
 
15
 
16
  T = TypeVar("T", bound="VectorBaseDocument")
17
 
18
+ EMBEDDING_SIZE = 1536
19
 
20
 
21
  class VectorBaseDocument(BaseModel, Generic[T], ABC):
rag_demo/preprocessing/embed.py CHANGED
@@ -8,6 +8,7 @@ import os
8
  from dotenv import load_dotenv
9
  from uuid import uuid4
10
  from loguru import logger
 
11
 
12
  load_dotenv()
13
 
@@ -17,10 +18,7 @@ def batch(list_: list, size: int) -> Generator[list, None, None]:
17
 
18
 
19
  def embed_chunks(chunks: list[Chunk]) -> list[EmbeddedChunk]:
20
- api = InferenceClient(
21
- model="intfloat/multilingual-e5-large-instruct",
22
- token=os.getenv("HF_API_TOKEN"),
23
- )
24
  logger.info(f"Embedding {len(chunks)} chunks")
25
  embedded_chunks = []
26
  for chunk in chunks:
@@ -29,7 +27,11 @@ def embed_chunks(chunks: list[Chunk]) -> list[EmbeddedChunk]:
29
  EmbeddedChunk(
30
  id=uuid4(),
31
  content=chunk.content,
32
- embedding=api.feature_extraction(chunk.content),
 
 
 
 
33
  document_id=chunk.document_id,
34
  chunk_id=chunk.chunk_id,
35
  metadata=chunk.metadata,
 
8
  from dotenv import load_dotenv
9
  from uuid import uuid4
10
  from loguru import logger
11
+ from openai import OpenAI
12
 
13
  load_dotenv()
14
 
 
18
 
19
 
20
  def embed_chunks(chunks: list[Chunk]) -> list[EmbeddedChunk]:
21
+ api = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 
 
 
22
  logger.info(f"Embedding {len(chunks)} chunks")
23
  embedded_chunks = []
24
  for chunk in chunks:
 
27
  EmbeddedChunk(
28
  id=uuid4(),
29
  content=chunk.content,
30
+ embedding=api.embeddings.create(
31
+ model="text-embedding-3-small", input=chunk.content
32
+ )
33
+ .data[0]
34
+ .embedding,
35
  document_id=chunk.document_id,
36
  chunk_id=chunk.chunk_id,
37
  metadata=chunk.metadata,
rag_demo/rag/__pycache__/context_generator.cpython-312.pyc ADDED
Binary file (2.8 kB). View file
 
rag_demo/rag/__pycache__/prompt_templates.cpython-312.pyc ADDED
Binary file (2.33 kB). View file
 
rag_demo/rag/__pycache__/query_classifier.cpython-312.pyc ADDED
Binary file (1.83 kB). View file
 
rag_demo/rag/__pycache__/query_expansion.cpython-312.pyc ADDED
Binary file (2.09 kB). View file
 
rag_demo/rag/__pycache__/reranker.cpython-312.pyc ADDED
Binary file (1.62 kB). View file
 
rag_demo/rag/__pycache__/retriever.cpython-312.pyc ADDED
Binary file (8.09 kB). View file
 
rag_demo/rag/__pycache__/source_annotator.cpython-312.pyc ADDED
Binary file (2.77 kB). View file
 
rag_demo/rag/base/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (270 Bytes). View file
 
rag_demo/rag/base/__pycache__/query.cpython-312.pyc ADDED
Binary file (1.75 kB). View file
 
rag_demo/rag/base/__pycache__/template_factory.cpython-312.pyc ADDED
Binary file (1.37 kB). View file
 
rag_demo/rag/prompt_templates.py CHANGED
@@ -29,10 +29,10 @@ class QueryExpansionTemplate(PromptTemplateFactory):
29
  class AnswerGenerationTemplate(PromptTemplateFactory):
30
  prompt: str = """You are an AI language model assistant. Your task is to generate an answer to the given user question based on the provided context.
31
  Context: {context}
32
- Question: {question}
33
 
34
- Give your answer in markdown format if needed, for example if a table is the best way to answer the question, or if titles and subheadings are needed.
35
- Give only your answer, do not include any other text like 'Certainly! Here is the answer:' or 'The answer is:' or anything similar."""
36
 
37
  def create_template(self, context: str, question: str) -> str:
38
  return self.prompt.format(context=context, question=question)
 
29
  class AnswerGenerationTemplate(PromptTemplateFactory):
30
  prompt: str = """You are an AI language model assistant. Your task is to generate an answer to the given user question based on the provided context.
31
  Context: {context}
32
+ Question: {question}"""
33
 
34
+ # Give only your answer, do not include any other text like 'Certainly! Here is the answer:' or 'The answer is:' or anything similar.
35
+ # Give your answer in markdown format if needed, for example if a table is the best way to answer the question, or if titles and subheadings are needed.
36
 
37
  def create_template(self, context: str, question: str) -> str:
38
  return self.prompt.format(context=context, question=question)
rag_demo/rag/query_classifier.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Any
3
+ from openai import OpenAI
4
+
5
+ from rag_demo.rag.base.query import Query
6
+ from rag_demo.rag.base.template_factory import RAGStep
7
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
8
+ from loguru import logger
9
+ import torch
10
+
11
+ model_name = (
12
+ "AdrienB134/greetings-classifier" # Model trained on English greetings only
13
+ )
14
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
15
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
16
+
17
+
18
+ class QueryClassifier(RAGStep):
19
+ def generate(self, query: Query) -> Any:
20
+ if self._mock:
21
+ return "Sources_needed"
22
+
23
+ with torch.no_grad():
24
+ inputs = tokenizer(query.content, return_tensors="pt")
25
+ logits = model(**inputs).logits
26
+ predictions = logits.argmax()
27
+ return model.config.id2label[predictions.item()]
rag_demo/rag/query_expansion.py CHANGED
@@ -1,7 +1,6 @@
1
  import os
2
  from typing import Any
3
-
4
- from huggingface_hub import InferenceClient
5
 
6
  from rag_demo.rag.base.query import Query
7
  from rag_demo.rag.base.template_factory import RAGStep
@@ -10,14 +9,12 @@ from rag_demo.rag.prompt_templates import QueryExpansionTemplate
10
 
11
  class QueryExpansion(RAGStep):
12
  def generate(self, query: Query, expand_to_n: int) -> Any:
13
- api = InferenceClient(
14
- model="Qwen/Qwen2.5-72B-Instruct",
15
- token=os.getenv("HF_API_TOKEN"),
16
- )
17
  query_expansion_template = QueryExpansionTemplate()
18
  prompt = query_expansion_template.create_template(expand_to_n - 1)
19
- response = api.chat_completion(
20
- [
 
21
  {
22
  "role": "user",
23
  "content": prompt.template.format(
@@ -26,7 +23,8 @@ class QueryExpansion(RAGStep):
26
  separator=query_expansion_template.separator,
27
  ),
28
  }
29
- ]
 
30
  )
31
  result = response.choices[0].message.content
32
  queries_content = result.split(query_expansion_template.separator)
 
1
  import os
2
  from typing import Any
3
+ from openai import OpenAI
 
4
 
5
  from rag_demo.rag.base.query import Query
6
  from rag_demo.rag.base.template_factory import RAGStep
 
9
 
10
  class QueryExpansion(RAGStep):
11
  def generate(self, query: Query, expand_to_n: int) -> Any:
12
+ api = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 
 
 
13
  query_expansion_template = QueryExpansionTemplate()
14
  prompt = query_expansion_template.create_template(expand_to_n - 1)
15
+ response = api.chat.completions.create(
16
+ model="gpt-4o-mini",
17
+ messages=[
18
  {
19
  "role": "user",
20
  "content": prompt.template.format(
 
23
  separator=query_expansion_template.separator,
24
  ),
25
  }
26
+ ],
27
+ max_tokens=8192,
28
  )
29
  result = response.choices[0].message.content
30
  queries_content = result.split(query_expansion_template.separator)
rag_demo/rag/reranker.py CHANGED
@@ -2,6 +2,7 @@ import os
2
 
3
  from huggingface_hub import InferenceClient
4
 
 
5
  from rag_demo.rag.base.query import Query
6
  from rag_demo.rag.base.template_factory import RAGStep
7
  from rag_demo.preprocessing.embed import EmbeddedChunk
 
2
 
3
  from huggingface_hub import InferenceClient
4
 
5
+
6
  from rag_demo.rag.base.query import Query
7
  from rag_demo.rag.base.template_factory import RAGStep
8
  from rag_demo.preprocessing.embed import EmbeddedChunk
rag_demo/rag/retriever.py CHANGED
@@ -3,7 +3,7 @@ import os
3
 
4
  from loguru import logger
5
  from qdrant_client.models import FieldCondition, Filter, MatchValue
6
- from huggingface_hub import InferenceClient
7
 
8
  from rag_demo.preprocessing.base import (
9
  EmbeddedChunk,
@@ -13,7 +13,8 @@ from rag_demo.rag.base.query import EmbeddedQuery, Query
13
  from .query_expansion import QueryExpansion
14
  from .reranker import Reranker
15
  from .prompt_templates import AnswerGenerationTemplate
16
-
 
17
  from dotenv import load_dotenv
18
 
19
  load_dotenv()
@@ -29,6 +30,8 @@ class RAGPipeline:
29
  def __init__(self, mock: bool = False) -> None:
30
  self._query_expander = QueryExpansion(mock=mock)
31
  self._reranker = Reranker(mock=mock)
 
 
32
 
33
  def search(
34
  self,
@@ -77,12 +80,13 @@ class RAGPipeline:
77
  limit=k,
78
  )
79
 
80
- api = InferenceClient(
81
- model="intfloat/multilingual-e5-large-instruct",
82
- token=os.getenv("HF_API_TOKEN"),
83
- )
84
  embedded_query: EmbeddedQuery = EmbeddedQuery(
85
- embedding=api.feature_extraction(query.content),
 
 
 
 
86
  id=query.id,
87
  content=query.content,
88
  )
@@ -111,23 +115,42 @@ class RAGPipeline:
111
  for chunk in reranked_chunks:
112
  context += "\n Document: "
113
  context += chunk.content
114
- api = InferenceClient(
115
- model="meta-llama/Llama-3.3-70B-Instruct",
116
- token=os.getenv("HF_API_TOKEN"),
117
- )
118
  answer_generation_template = AnswerGenerationTemplate()
119
  prompt = answer_generation_template.create_template(context, query)
120
  logger.info(prompt)
121
- response = api.chat_completion(
122
- [{"role": "user", "content": prompt}],
 
123
  max_tokens=8192,
124
  )
125
  return response.choices[0].message.content
126
 
 
 
 
 
127
  def rag(self, query: str) -> tuple[str, list[str]]:
128
- docs = self.search(query, k=10)
129
- reranked_docs = self.rerank(query, docs, keep_top_k=10)
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  return (
131
- self.generate_answer(query, reranked_docs),
132
- list(set([doc.metadata["filename"].split(".pdf")[0] for doc in reranked_docs])),
 
 
 
 
133
  )
 
3
 
4
  from loguru import logger
5
  from qdrant_client.models import FieldCondition, Filter, MatchValue
6
+ from openai import OpenAI
7
 
8
  from rag_demo.preprocessing.base import (
9
  EmbeddedChunk,
 
13
  from .query_expansion import QueryExpansion
14
  from .reranker import Reranker
15
  from .prompt_templates import AnswerGenerationTemplate
16
+ from .source_annotator import SourceAnnotator
17
+ from .query_classifier import QueryClassifier
18
  from dotenv import load_dotenv
19
 
20
  load_dotenv()
 
30
  def __init__(self, mock: bool = False) -> None:
31
  self._query_expander = QueryExpansion(mock=mock)
32
  self._reranker = Reranker(mock=mock)
33
+ self._source_annotator = SourceAnnotator()
34
+ self._query_classifier = QueryClassifier(mock=mock)
35
 
36
  def search(
37
  self,
 
80
  limit=k,
81
  )
82
 
83
+ api = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 
 
 
84
  embedded_query: EmbeddedQuery = EmbeddedQuery(
85
+ embedding=api.embeddings.create(
86
+ model="text-embedding-3-small", input=query.content
87
+ )
88
+ .data[0]
89
+ .embedding,
90
  id=query.id,
91
  content=query.content,
92
  )
 
115
  for chunk in reranked_chunks:
116
  context += "\n Document: "
117
  context += chunk.content
118
+ api = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 
 
 
119
  answer_generation_template = AnswerGenerationTemplate()
120
  prompt = answer_generation_template.create_template(context, query)
121
  logger.info(prompt)
122
+ response = api.chat.completions.create(
123
+ model="gpt-4o-mini",
124
+ messages=[{"role": "user", "content": prompt}],
125
  max_tokens=8192,
126
  )
127
  return response.choices[0].message.content
128
 
129
+ def add_context(self, response: str, reranked_chunks: list[EmbeddedChunk]) -> str:
130
+ logger.info("Adding context to the answer")
131
+ return self._source_annotator.annotate(response, reranked_chunks)
132
+
133
  def rag(self, query: str) -> tuple[str, list[str]]:
134
+ query_type = self._query_classifier.generate(Query.from_str(query))
135
+ logger.info(f"Query type: {query_type}")
136
+ if query_type == "Sources_needed":
137
+ docs = self.search(query, k=10)
138
+ reranked_docs = self.rerank(query, docs, keep_top_k=10)
139
+ else:
140
+ reranked_docs = []
141
+
142
+ answer = self.generate_answer(query, reranked_docs)
143
+
144
+ if reranked_docs:
145
+ annotated_answer = self.add_context(answer, reranked_docs)
146
+ else:
147
+ annotated_answer = answer
148
+
149
  return (
150
+ annotated_answer,
151
+ list(
152
+ set(
153
+ [doc.metadata["filename"].split(".pdf")[0] for doc in reranked_docs]
154
+ )
155
+ ),
156
  )
rag_demo/rag/source_annotator.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Any
3
+ import re
4
+ from loguru import logger
5
+
6
+ from rag_demo.preprocessing.embed import EmbeddedChunk
7
+
8
+
9
+ from transformers import pipeline
10
+
11
+
12
+ class SourceAnnotator:
13
+ def __init__(self):
14
+ self.source_annotator = pipeline(
15
+ "question-answering",
16
+ model="distilbert/distilbert-base-cased-distilled-squad",
17
+ )
18
+
19
+ def annotate(self, response: str, reranked_chunks: list[EmbeddedChunk]) -> str:
20
+ sentences = self.split_sentences(response)
21
+ annotated_response = ""
22
+ for sentence in sentences:
23
+ scores = []
24
+ for chunk in reranked_chunks:
25
+ score = self.annotate_source(sentence.lower(), chunk.content.lower())
26
+ score["filename"] = chunk.metadata["filename"].split(".pdf")[0]
27
+ score["chunk_id"] = chunk.chunk_id
28
+ scores.append(score)
29
+
30
+ # Could also use a score cut-off instead of max()
31
+ max_score = max(scores, key=lambda x: x["score"])
32
+
33
+ annotated_response += f"{sentence} [filename: {max_score['filename']}, chunk_id: {max_score['chunk_id']}] "
34
+
35
+ return annotated_response
36
+
37
+ def split_sentences(self, text: str) -> list[str]:
38
+ pattern = r"(?<=[.!?])\s+(?=[A-Z])"
39
+ sentences = re.split(pattern, text)
40
+ return [s.strip() for s in sentences if s.strip()]
41
+
42
+ def annotate_source(self, text: str, chunk: str) -> dict:
43
+ return self.source_annotator(text, chunk)
rag_demo/settings.py CHANGED
@@ -26,10 +26,9 @@ class Settings(BaseSettings):
26
  @classmethod
27
  def load_settings(cls) -> "Settings":
28
  """
29
- Tries to load the settings from the ZenML secret store. If the secret does not exist, it initializes the settings from the .env file and default values.
30
-
31
  Returns:
32
- Settings: The initialized settings object.
33
  """
34
 
35
  settings = Settings()
 
26
  @classmethod
27
  def load_settings(cls) -> "Settings":
28
  """
29
+ Loads the settings from the .env file.
 
30
  Returns:
31
+ Settings: The initialized settings object.
32
  """
33
 
34
  settings = Settings()
templates/chat.html CHANGED
@@ -268,9 +268,6 @@
268
  </div>
269
  <div class="main-container">
270
  <div class="chat-card">
271
- <div class="logo-container">
272
- <img src="./static/Matriv-white.png" alt="Matriv Logo" style="width: 100px; height: auto;">
273
- </div>
274
  <div class="chat-container" id="chatContainer">
275
  </div>
276
  <div class="input-container">
 
268
  </div>
269
  <div class="main-container">
270
  <div class="chat-card">
 
 
 
271
  <div class="chat-container" id="chatContainer">
272
  </div>
273
  <div class="input-container">
uv.lock CHANGED
@@ -271,6 +271,15 @@ wheels = [
271
  { url = "https://files.pythonhosted.org/packages/68/69/1bcf70f81de1b4a9f21b3a62ec0c83bdff991c88d6cc2267d02408457e88/dirtyjson-1.0.8-py3-none-any.whl", hash = "sha256:125e27248435a58acace26d5c2c4c11a1c0de0a9c5124c5a94ba78e517d74f53", size = 25197 },
272
  ]
273
 
 
 
 
 
 
 
 
 
 
274
  [[package]]
275
  name = "fastapi"
276
  version = "0.115.6"
@@ -651,6 +660,53 @@ wheels = [
651
  { url = "https://files.pythonhosted.org/packages/31/80/3a54838c3fb461f6fec263ebf3a3a41771bd05190238de3486aae8540c36/jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d", size = 133271 },
652
  ]
653
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
654
  [[package]]
655
  name = "joblib"
656
  version = "1.4.2"
@@ -1262,6 +1318,25 @@ wheels = [
1262
  { url = "https://files.pythonhosted.org/packages/14/56/fd990ca222cef4f9f4a9400567b9a15b220dee2eafffb16b2adbc55c8281/onnxruntime-1.20.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0df6f2df83d61f46e842dbcde610ede27218947c33e994545a22333491e72a3b", size = 13337040 },
1263
  ]
1264
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1265
  [[package]]
1266
  name = "opencv-python"
1267
  version = "4.10.0.84"
@@ -1714,6 +1789,7 @@ dependencies = [
1714
  { name = "llama-parse" },
1715
  { name = "loguru" },
1716
  { name = "marker-pdf" },
 
1717
  { name = "pydantic" },
1718
  { name = "python-multipart" },
1719
  { name = "qdrant-client", extra = ["fastembed"] },
@@ -1728,6 +1804,7 @@ requires-dist = [
1728
  { name = "llama-parse", specifier = ">=0.5.17" },
1729
  { name = "loguru", specifier = ">=0.7.2" },
1730
  { name = "marker-pdf", specifier = ">=1.0.2" },
 
1731
  { name = "pydantic", specifier = ">=2.10.3" },
1732
  { name = "python-multipart", specifier = ">=0.0.19" },
1733
  { name = "qdrant-client", extras = ["fastembed"], specifier = ">=1.12.1" },
 
271
  { url = "https://files.pythonhosted.org/packages/68/69/1bcf70f81de1b4a9f21b3a62ec0c83bdff991c88d6cc2267d02408457e88/dirtyjson-1.0.8-py3-none-any.whl", hash = "sha256:125e27248435a58acace26d5c2c4c11a1c0de0a9c5124c5a94ba78e517d74f53", size = 25197 },
272
  ]
273
 
274
+ [[package]]
275
+ name = "distro"
276
+ version = "1.9.0"
277
+ source = { registry = "https://pypi.org/simple" }
278
+ sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722 }
279
+ wheels = [
280
+ { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277 },
281
+ ]
282
+
283
  [[package]]
284
  name = "fastapi"
285
  version = "0.115.6"
 
660
  { url = "https://files.pythonhosted.org/packages/31/80/3a54838c3fb461f6fec263ebf3a3a41771bd05190238de3486aae8540c36/jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d", size = 133271 },
661
  ]
662
 
663
+ [[package]]
664
+ name = "jiter"
665
+ version = "0.8.2"
666
+ source = { registry = "https://pypi.org/simple" }
667
+ sdist = { url = "https://files.pythonhosted.org/packages/f8/70/90bc7bd3932e651486861df5c8ffea4ca7c77d28e8532ddefe2abc561a53/jiter-0.8.2.tar.gz", hash = "sha256:cd73d3e740666d0e639f678adb176fad25c1bcbdae88d8d7b857e1783bb4212d", size = 163007 }
668
+ wheels = [
669
+ { url = "https://files.pythonhosted.org/packages/cb/b0/c1a7caa7f9dc5f1f6cfa08722867790fe2d3645d6e7170ca280e6e52d163/jiter-0.8.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:2dd61c5afc88a4fda7d8b2cf03ae5947c6ac7516d32b7a15bf4b49569a5c076b", size = 303666 },
670
+ { url = "https://files.pythonhosted.org/packages/f5/97/0468bc9eeae43079aaa5feb9267964e496bf13133d469cfdc135498f8dd0/jiter-0.8.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a6c710d657c8d1d2adbbb5c0b0c6bfcec28fd35bd6b5f016395f9ac43e878a15", size = 311934 },
671
+ { url = "https://files.pythonhosted.org/packages/e5/69/64058e18263d9a5f1e10f90c436853616d5f047d997c37c7b2df11b085ec/jiter-0.8.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9584de0cd306072635fe4b89742bf26feae858a0683b399ad0c2509011b9dc0", size = 335506 },
672
+ { url = "https://files.pythonhosted.org/packages/9d/14/b747f9a77b8c0542141d77ca1e2a7523e854754af2c339ac89a8b66527d6/jiter-0.8.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5a90a923338531b7970abb063cfc087eebae6ef8ec8139762007188f6bc69a9f", size = 355849 },
673
+ { url = "https://files.pythonhosted.org/packages/53/e2/98a08161db7cc9d0e39bc385415890928ff09709034982f48eccfca40733/jiter-0.8.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d21974d246ed0181558087cd9f76e84e8321091ebfb3a93d4c341479a736f099", size = 381700 },
674
+ { url = "https://files.pythonhosted.org/packages/7a/38/1674672954d35bce3b1c9af99d5849f9256ac8f5b672e020ac7821581206/jiter-0.8.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:32475a42b2ea7b344069dc1e81445cfc00b9d0e3ca837f0523072432332e9f74", size = 389710 },
675
+ { url = "https://files.pythonhosted.org/packages/f8/9b/92f9da9a9e107d019bcf883cd9125fa1690079f323f5a9d5c6986eeec3c0/jiter-0.8.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b9931fd36ee513c26b5bf08c940b0ac875de175341cbdd4fa3be109f0492586", size = 345553 },
676
+ { url = "https://files.pythonhosted.org/packages/44/a6/6d030003394e9659cd0d7136bbeabd82e869849ceccddc34d40abbbbb269/jiter-0.8.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ce0820f4a3a59ddced7fce696d86a096d5cc48d32a4183483a17671a61edfddc", size = 376388 },
677
+ { url = "https://files.pythonhosted.org/packages/ad/8d/87b09e648e4aca5f9af89e3ab3cfb93db2d1e633b2f2931ede8dabd9b19a/jiter-0.8.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8ffc86ae5e3e6a93765d49d1ab47b6075a9c978a2b3b80f0f32628f39caa0c88", size = 511226 },
678
+ { url = "https://files.pythonhosted.org/packages/77/95/8008ebe4cdc82eac1c97864a8042ca7e383ed67e0ec17bfd03797045c727/jiter-0.8.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5127dc1abd809431172bc3fbe8168d6b90556a30bb10acd5ded41c3cfd6f43b6", size = 504134 },
679
+ { url = "https://files.pythonhosted.org/packages/26/0d/3056a74de13e8b2562e4d526de6dac2f65d91ace63a8234deb9284a1d24d/jiter-0.8.2-cp311-cp311-win32.whl", hash = "sha256:66227a2c7b575720c1871c8800d3a0122bb8ee94edb43a5685aa9aceb2782d44", size = 203103 },
680
+ { url = "https://files.pythonhosted.org/packages/4e/1e/7f96b798f356e531ffc0f53dd2f37185fac60fae4d6c612bbbd4639b90aa/jiter-0.8.2-cp311-cp311-win_amd64.whl", hash = "sha256:cde031d8413842a1e7501e9129b8e676e62a657f8ec8166e18a70d94d4682855", size = 206717 },
681
+ { url = "https://files.pythonhosted.org/packages/a1/17/c8747af8ea4e045f57d6cfd6fc180752cab9bc3de0e8a0c9ca4e8af333b1/jiter-0.8.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:e6ec2be506e7d6f9527dae9ff4b7f54e68ea44a0ef6b098256ddf895218a2f8f", size = 302027 },
682
+ { url = "https://files.pythonhosted.org/packages/3c/c1/6da849640cd35a41e91085723b76acc818d4b7d92b0b6e5111736ce1dd10/jiter-0.8.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:76e324da7b5da060287c54f2fabd3db5f76468006c811831f051942bf68c9d44", size = 310326 },
683
+ { url = "https://files.pythonhosted.org/packages/06/99/a2bf660d8ccffee9ad7ed46b4f860d2108a148d0ea36043fd16f4dc37e94/jiter-0.8.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:180a8aea058f7535d1c84183c0362c710f4750bef66630c05f40c93c2b152a0f", size = 334242 },
684
+ { url = "https://files.pythonhosted.org/packages/a7/5f/cea1c17864828731f11427b9d1ab7f24764dbd9aaf4648a7f851164d2718/jiter-0.8.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:025337859077b41548bdcbabe38698bcd93cfe10b06ff66617a48ff92c9aec60", size = 356654 },
685
+ { url = "https://files.pythonhosted.org/packages/e9/13/62774b7e5e7f5d5043efe1d0f94ead66e6d0f894ae010adb56b3f788de71/jiter-0.8.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ecff0dc14f409599bbcafa7e470c00b80f17abc14d1405d38ab02e4b42e55b57", size = 379967 },
686
+ { url = "https://files.pythonhosted.org/packages/ec/fb/096b34c553bb0bd3f2289d5013dcad6074948b8d55212aa13a10d44c5326/jiter-0.8.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ffd9fee7d0775ebaba131f7ca2e2d83839a62ad65e8e02fe2bd8fc975cedeb9e", size = 389252 },
687
+ { url = "https://files.pythonhosted.org/packages/17/61/beea645c0bf398ced8b199e377b61eb999d8e46e053bb285c91c3d3eaab0/jiter-0.8.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14601dcac4889e0a1c75ccf6a0e4baf70dbc75041e51bcf8d0e9274519df6887", size = 345490 },
688
+ { url = "https://files.pythonhosted.org/packages/d5/df/834aa17ad5dcc3cf0118821da0a0cf1589ea7db9832589278553640366bc/jiter-0.8.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:92249669925bc1c54fcd2ec73f70f2c1d6a817928480ee1c65af5f6b81cdf12d", size = 376991 },
689
+ { url = "https://files.pythonhosted.org/packages/67/80/87d140399d382fb4ea5b3d56e7ecaa4efdca17cd7411ff904c1517855314/jiter-0.8.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e725edd0929fa79f8349ab4ec7f81c714df51dc4e991539a578e5018fa4a7152", size = 510822 },
690
+ { url = "https://files.pythonhosted.org/packages/5c/37/3394bb47bac1ad2cb0465601f86828a0518d07828a650722e55268cdb7e6/jiter-0.8.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bf55846c7b7a680eebaf9c3c48d630e1bf51bdf76c68a5f654b8524335b0ad29", size = 503730 },
691
+ { url = "https://files.pythonhosted.org/packages/f9/e2/253fc1fa59103bb4e3aa0665d6ceb1818df1cd7bf3eb492c4dad229b1cd4/jiter-0.8.2-cp312-cp312-win32.whl", hash = "sha256:7efe4853ecd3d6110301665a5178b9856be7e2a9485f49d91aa4d737ad2ae49e", size = 203375 },
692
+ { url = "https://files.pythonhosted.org/packages/41/69/6d4bbe66b3b3b4507e47aa1dd5d075919ad242b4b1115b3f80eecd443687/jiter-0.8.2-cp312-cp312-win_amd64.whl", hash = "sha256:83c0efd80b29695058d0fd2fa8a556490dbce9804eac3e281f373bbc99045f6c", size = 204740 },
693
+ { url = "https://files.pythonhosted.org/packages/6c/b0/bfa1f6f2c956b948802ef5a021281978bf53b7a6ca54bb126fd88a5d014e/jiter-0.8.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:ca1f08b8e43dc3bd0594c992fb1fd2f7ce87f7bf0d44358198d6da8034afdf84", size = 301190 },
694
+ { url = "https://files.pythonhosted.org/packages/a4/8f/396ddb4e292b5ea57e45ade5dc48229556b9044bad29a3b4b2dddeaedd52/jiter-0.8.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5672a86d55416ccd214c778efccf3266b84f87b89063b582167d803246354be4", size = 309334 },
695
+ { url = "https://files.pythonhosted.org/packages/7f/68/805978f2f446fa6362ba0cc2e4489b945695940656edd844e110a61c98f8/jiter-0.8.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58dc9bc9767a1101f4e5e22db1b652161a225874d66f0e5cb8e2c7d1c438b587", size = 333918 },
696
+ { url = "https://files.pythonhosted.org/packages/b3/99/0f71f7be667c33403fa9706e5b50583ae5106d96fab997fa7e2f38ee8347/jiter-0.8.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:37b2998606d6dadbb5ccda959a33d6a5e853252d921fec1792fc902351bb4e2c", size = 356057 },
697
+ { url = "https://files.pythonhosted.org/packages/8d/50/a82796e421a22b699ee4d2ce527e5bcb29471a2351cbdc931819d941a167/jiter-0.8.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4ab9a87f3784eb0e098f84a32670cfe4a79cb6512fd8f42ae3d0709f06405d18", size = 379790 },
698
+ { url = "https://files.pythonhosted.org/packages/3c/31/10fb012b00f6d83342ca9e2c9618869ab449f1aa78c8f1b2193a6b49647c/jiter-0.8.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:79aec8172b9e3c6d05fd4b219d5de1ac616bd8da934107325a6c0d0e866a21b6", size = 388285 },
699
+ { url = "https://files.pythonhosted.org/packages/c8/81/f15ebf7de57be488aa22944bf4274962aca8092e4f7817f92ffa50d3ee46/jiter-0.8.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:711e408732d4e9a0208008e5892c2966b485c783cd2d9a681f3eb147cf36c7ef", size = 344764 },
700
+ { url = "https://files.pythonhosted.org/packages/b3/e8/0cae550d72b48829ba653eb348cdc25f3f06f8a62363723702ec18e7be9c/jiter-0.8.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:653cf462db4e8c41995e33d865965e79641ef45369d8a11f54cd30888b7e6ff1", size = 376620 },
701
+ { url = "https://files.pythonhosted.org/packages/b8/50/e5478ff9d82534a944c03b63bc217c5f37019d4a34d288db0f079b13c10b/jiter-0.8.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:9c63eaef32b7bebac8ebebf4dabebdbc6769a09c127294db6babee38e9f405b9", size = 510402 },
702
+ { url = "https://files.pythonhosted.org/packages/8e/1e/3de48bbebbc8f7025bd454cedc8c62378c0e32dd483dece5f4a814a5cb55/jiter-0.8.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:eb21aaa9a200d0a80dacc7a81038d2e476ffe473ffdd9c91eb745d623561de05", size = 503018 },
703
+ { url = "https://files.pythonhosted.org/packages/d5/cd/d5a5501d72a11fe3e5fd65c78c884e5164eefe80077680533919be22d3a3/jiter-0.8.2-cp313-cp313-win32.whl", hash = "sha256:789361ed945d8d42850f919342a8665d2dc79e7e44ca1c97cc786966a21f627a", size = 203190 },
704
+ { url = "https://files.pythonhosted.org/packages/51/bf/e5ca301245ba951447e3ad677a02a64a8845b185de2603dabd83e1e4b9c6/jiter-0.8.2-cp313-cp313-win_amd64.whl", hash = "sha256:ab7f43235d71e03b941c1630f4b6e3055d46b6cb8728a17663eaac9d8e83a865", size = 203551 },
705
+ { url = "https://files.pythonhosted.org/packages/2f/3c/71a491952c37b87d127790dd7a0b1ebea0514c6b6ad30085b16bbe00aee6/jiter-0.8.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b426f72cd77da3fec300ed3bc990895e2dd6b49e3bfe6c438592a3ba660e41ca", size = 308347 },
706
+ { url = "https://files.pythonhosted.org/packages/a0/4c/c02408042e6a7605ec063daed138e07b982fdb98467deaaf1c90950cf2c6/jiter-0.8.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2dd880785088ff2ad21ffee205e58a8c1ddabc63612444ae41e5e4b321b39c0", size = 342875 },
707
+ { url = "https://files.pythonhosted.org/packages/91/61/c80ef80ed8a0a21158e289ef70dac01e351d929a1c30cb0f49be60772547/jiter-0.8.2-cp313-cp313t-win_amd64.whl", hash = "sha256:3ac9f578c46f22405ff7f8b1f5848fb753cc4b8377fbec8470a7dc3997ca7566", size = 202374 },
708
+ ]
709
+
710
  [[package]]
711
  name = "joblib"
712
  version = "1.4.2"
 
1318
  { url = "https://files.pythonhosted.org/packages/14/56/fd990ca222cef4f9f4a9400567b9a15b220dee2eafffb16b2adbc55c8281/onnxruntime-1.20.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0df6f2df83d61f46e842dbcde610ede27218947c33e994545a22333491e72a3b", size = 13337040 },
1319
  ]
1320
 
1321
+ [[package]]
1322
+ name = "openai"
1323
+ version = "1.60.1"
1324
+ source = { registry = "https://pypi.org/simple" }
1325
+ dependencies = [
1326
+ { name = "anyio" },
1327
+ { name = "distro" },
1328
+ { name = "httpx" },
1329
+ { name = "jiter" },
1330
+ { name = "pydantic" },
1331
+ { name = "sniffio" },
1332
+ { name = "tqdm" },
1333
+ { name = "typing-extensions" },
1334
+ ]
1335
+ sdist = { url = "https://files.pythonhosted.org/packages/4c/c4/a220c957aa4097f25498770c6eff8f3abd35934a8859e7a78928a8a70846/openai-1.60.1.tar.gz", hash = "sha256:beb1541dfc38b002bd629ab68b0d6fe35b870c5f4311d9bc4404d85af3214d5e", size = 348070 }
1336
+ wheels = [
1337
+ { url = "https://files.pythonhosted.org/packages/7a/ad/55b2d03feda5a0adc0a86048dcb7c9863fd24a3726815a04d5669e82e41e/openai-1.60.1-py3-none-any.whl", hash = "sha256:714181ec1c452353d456f143c22db892de7b373e3165063d02a2b798ed575ba1", size = 456110 },
1338
+ ]
1339
+
1340
  [[package]]
1341
  name = "opencv-python"
1342
  version = "4.10.0.84"
 
1789
  { name = "llama-parse" },
1790
  { name = "loguru" },
1791
  { name = "marker-pdf" },
1792
+ { name = "openai" },
1793
  { name = "pydantic" },
1794
  { name = "python-multipart" },
1795
  { name = "qdrant-client", extra = ["fastembed"] },
 
1804
  { name = "llama-parse", specifier = ">=0.5.17" },
1805
  { name = "loguru", specifier = ">=0.7.2" },
1806
  { name = "marker-pdf", specifier = ">=1.0.2" },
1807
+ { name = "openai", specifier = ">=1.60.1" },
1808
  { name = "pydantic", specifier = ">=2.10.3" },
1809
  { name = "python-multipart", specifier = ">=0.0.19" },
1810
  { name = "qdrant-client", extras = ["fastembed"], specifier = ">=1.12.1" },