Adrien
commited on
Commit
·
cc3f1e1
1
Parent(s):
9b37798
feat: add inline sources and query classifier
Browse files- .gitignore +5 -0
- README.md +1 -1
- pyproject.toml +1 -0
- rag_demo/__pycache__/__init__.cpython-312.pyc +0 -0
- rag_demo/__pycache__/pipeline.cpython-312.pyc +0 -0
- rag_demo/infra/__pycache__/qdrant.cpython-312.pyc +0 -0
- rag_demo/preprocessing/__pycache__/__init__.cpython-312.pyc +0 -0
- rag_demo/preprocessing/__pycache__/chunking.cpython-312.pyc +0 -0
- rag_demo/preprocessing/__pycache__/embed.cpython-312.pyc +0 -0
- rag_demo/preprocessing/__pycache__/load_to_vectordb.cpython-312.pyc +0 -0
- rag_demo/preprocessing/__pycache__/pdf_conversion.cpython-312.pyc +0 -0
- rag_demo/preprocessing/base/__pycache__/__init__.cpython-312.pyc +0 -0
- rag_demo/preprocessing/base/__pycache__/chunk.cpython-312.pyc +0 -0
- rag_demo/preprocessing/base/__pycache__/document.cpython-312.pyc +0 -0
- rag_demo/preprocessing/base/__pycache__/embedded_chunk.cpython-312.pyc +0 -0
- rag_demo/preprocessing/base/__pycache__/vectordb.cpython-312.pyc +0 -0
- rag_demo/preprocessing/base/vectordb.py +1 -1
- rag_demo/preprocessing/embed.py +7 -5
- rag_demo/rag/__pycache__/context_generator.cpython-312.pyc +0 -0
- rag_demo/rag/__pycache__/prompt_templates.cpython-312.pyc +0 -0
- rag_demo/rag/__pycache__/query_classifier.cpython-312.pyc +0 -0
- rag_demo/rag/__pycache__/query_expansion.cpython-312.pyc +0 -0
- rag_demo/rag/__pycache__/reranker.cpython-312.pyc +0 -0
- rag_demo/rag/__pycache__/retriever.cpython-312.pyc +0 -0
- rag_demo/rag/__pycache__/source_annotator.cpython-312.pyc +0 -0
- rag_demo/rag/base/__pycache__/__init__.cpython-312.pyc +0 -0
- rag_demo/rag/base/__pycache__/query.cpython-312.pyc +0 -0
- rag_demo/rag/base/__pycache__/template_factory.cpython-312.pyc +0 -0
- rag_demo/rag/prompt_templates.py +3 -3
- rag_demo/rag/query_classifier.py +27 -0
- rag_demo/rag/query_expansion.py +7 -9
- rag_demo/rag/reranker.py +1 -0
- rag_demo/rag/retriever.py +40 -17
- rag_demo/rag/source_annotator.py +43 -0
- rag_demo/settings.py +2 -3
- templates/chat.html +0 -3
- uv.lock +77 -0
.gitignore
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.env
|
2 |
+
*/.env
|
3 |
+
.venv/
|
4 |
+
.mypy_cache/
|
5 |
+
data/*
|
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
colorFrom: blue
|
4 |
colorTo: red
|
5 |
sdk: docker
|
|
|
1 |
---
|
2 |
+
title: rag-with-inline-citations
|
3 |
colorFrom: blue
|
4 |
colorTo: red
|
5 |
sdk: docker
|
pyproject.toml
CHANGED
@@ -15,4 +15,5 @@ dependencies = [
|
|
15 |
"uvicorn>=0.32.1",
|
16 |
"huggingface-hub>=0.26.3",
|
17 |
"llama-parse>=0.5.17",
|
|
|
18 |
]
|
|
|
15 |
"uvicorn>=0.32.1",
|
16 |
"huggingface-hub>=0.26.3",
|
17 |
"llama-parse>=0.5.17",
|
18 |
+
"openai>=1.60.1",
|
19 |
]
|
rag_demo/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (250 Bytes). View file
|
|
rag_demo/__pycache__/pipeline.cpython-312.pyc
ADDED
Binary file (624 Bytes). View file
|
|
rag_demo/infra/__pycache__/qdrant.cpython-312.pyc
ADDED
Binary file (1.22 kB). View file
|
|
rag_demo/preprocessing/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (392 Bytes). View file
|
|
rag_demo/preprocessing/__pycache__/chunking.cpython-312.pyc
ADDED
Binary file (1.11 kB). View file
|
|
rag_demo/preprocessing/__pycache__/embed.cpython-312.pyc
ADDED
Binary file (3.24 kB). View file
|
|
rag_demo/preprocessing/__pycache__/load_to_vectordb.cpython-312.pyc
ADDED
Binary file (2.1 kB). View file
|
|
rag_demo/preprocessing/__pycache__/pdf_conversion.cpython-312.pyc
ADDED
Binary file (1.8 kB). View file
|
|
rag_demo/preprocessing/base/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (454 Bytes). View file
|
|
rag_demo/preprocessing/base/__pycache__/chunk.cpython-312.pyc
ADDED
Binary file (769 Bytes). View file
|
|
rag_demo/preprocessing/base/__pycache__/document.cpython-312.pyc
ADDED
Binary file (921 Bytes). View file
|
|
rag_demo/preprocessing/base/__pycache__/embedded_chunk.cpython-312.pyc
ADDED
Binary file (1.83 kB). View file
|
|
rag_demo/preprocessing/base/__pycache__/vectordb.cpython-312.pyc
ADDED
Binary file (14.2 kB). View file
|
|
rag_demo/preprocessing/base/vectordb.py
CHANGED
@@ -15,7 +15,7 @@ from rag_demo.infra.qdrant import connection
|
|
15 |
|
16 |
T = TypeVar("T", bound="VectorBaseDocument")
|
17 |
|
18 |
-
EMBEDDING_SIZE =
|
19 |
|
20 |
|
21 |
class VectorBaseDocument(BaseModel, Generic[T], ABC):
|
|
|
15 |
|
16 |
T = TypeVar("T", bound="VectorBaseDocument")
|
17 |
|
18 |
+
EMBEDDING_SIZE = 1536
|
19 |
|
20 |
|
21 |
class VectorBaseDocument(BaseModel, Generic[T], ABC):
|
rag_demo/preprocessing/embed.py
CHANGED
@@ -8,6 +8,7 @@ import os
|
|
8 |
from dotenv import load_dotenv
|
9 |
from uuid import uuid4
|
10 |
from loguru import logger
|
|
|
11 |
|
12 |
load_dotenv()
|
13 |
|
@@ -17,10 +18,7 @@ def batch(list_: list, size: int) -> Generator[list, None, None]:
|
|
17 |
|
18 |
|
19 |
def embed_chunks(chunks: list[Chunk]) -> list[EmbeddedChunk]:
|
20 |
-
api =
|
21 |
-
model="intfloat/multilingual-e5-large-instruct",
|
22 |
-
token=os.getenv("HF_API_TOKEN"),
|
23 |
-
)
|
24 |
logger.info(f"Embedding {len(chunks)} chunks")
|
25 |
embedded_chunks = []
|
26 |
for chunk in chunks:
|
@@ -29,7 +27,11 @@ def embed_chunks(chunks: list[Chunk]) -> list[EmbeddedChunk]:
|
|
29 |
EmbeddedChunk(
|
30 |
id=uuid4(),
|
31 |
content=chunk.content,
|
32 |
-
embedding=api.
|
|
|
|
|
|
|
|
|
33 |
document_id=chunk.document_id,
|
34 |
chunk_id=chunk.chunk_id,
|
35 |
metadata=chunk.metadata,
|
|
|
8 |
from dotenv import load_dotenv
|
9 |
from uuid import uuid4
|
10 |
from loguru import logger
|
11 |
+
from openai import OpenAI
|
12 |
|
13 |
load_dotenv()
|
14 |
|
|
|
18 |
|
19 |
|
20 |
def embed_chunks(chunks: list[Chunk]) -> list[EmbeddedChunk]:
|
21 |
+
api = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
|
|
|
|
|
|
22 |
logger.info(f"Embedding {len(chunks)} chunks")
|
23 |
embedded_chunks = []
|
24 |
for chunk in chunks:
|
|
|
27 |
EmbeddedChunk(
|
28 |
id=uuid4(),
|
29 |
content=chunk.content,
|
30 |
+
embedding=api.embeddings.create(
|
31 |
+
model="text-embedding-3-small", input=chunk.content
|
32 |
+
)
|
33 |
+
.data[0]
|
34 |
+
.embedding,
|
35 |
document_id=chunk.document_id,
|
36 |
chunk_id=chunk.chunk_id,
|
37 |
metadata=chunk.metadata,
|
rag_demo/rag/__pycache__/context_generator.cpython-312.pyc
ADDED
Binary file (2.8 kB). View file
|
|
rag_demo/rag/__pycache__/prompt_templates.cpython-312.pyc
ADDED
Binary file (2.33 kB). View file
|
|
rag_demo/rag/__pycache__/query_classifier.cpython-312.pyc
ADDED
Binary file (1.83 kB). View file
|
|
rag_demo/rag/__pycache__/query_expansion.cpython-312.pyc
ADDED
Binary file (2.09 kB). View file
|
|
rag_demo/rag/__pycache__/reranker.cpython-312.pyc
ADDED
Binary file (1.62 kB). View file
|
|
rag_demo/rag/__pycache__/retriever.cpython-312.pyc
ADDED
Binary file (8.09 kB). View file
|
|
rag_demo/rag/__pycache__/source_annotator.cpython-312.pyc
ADDED
Binary file (2.77 kB). View file
|
|
rag_demo/rag/base/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (270 Bytes). View file
|
|
rag_demo/rag/base/__pycache__/query.cpython-312.pyc
ADDED
Binary file (1.75 kB). View file
|
|
rag_demo/rag/base/__pycache__/template_factory.cpython-312.pyc
ADDED
Binary file (1.37 kB). View file
|
|
rag_demo/rag/prompt_templates.py
CHANGED
@@ -29,10 +29,10 @@ class QueryExpansionTemplate(PromptTemplateFactory):
|
|
29 |
class AnswerGenerationTemplate(PromptTemplateFactory):
|
30 |
prompt: str = """You are an AI language model assistant. Your task is to generate an answer to the given user question based on the provided context.
|
31 |
Context: {context}
|
32 |
-
Question: {question}
|
33 |
|
34 |
-
Give your answer
|
35 |
-
Give
|
36 |
|
37 |
def create_template(self, context: str, question: str) -> str:
|
38 |
return self.prompt.format(context=context, question=question)
|
|
|
29 |
class AnswerGenerationTemplate(PromptTemplateFactory):
|
30 |
prompt: str = """You are an AI language model assistant. Your task is to generate an answer to the given user question based on the provided context.
|
31 |
Context: {context}
|
32 |
+
Question: {question}"""
|
33 |
|
34 |
+
# Give only your answer, do not include any other text like 'Certainly! Here is the answer:' or 'The answer is:' or anything similar.
|
35 |
+
# Give your answer in markdown format if needed, for example if a table is the best way to answer the question, or if titles and subheadings are needed.
|
36 |
|
37 |
def create_template(self, context: str, question: str) -> str:
|
38 |
return self.prompt.format(context=context, question=question)
|
rag_demo/rag/query_classifier.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from typing import Any
|
3 |
+
from openai import OpenAI
|
4 |
+
|
5 |
+
from rag_demo.rag.base.query import Query
|
6 |
+
from rag_demo.rag.base.template_factory import RAGStep
|
7 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
8 |
+
from loguru import logger
|
9 |
+
import torch
|
10 |
+
|
11 |
+
model_name = (
|
12 |
+
"AdrienB134/greetings-classifier" # Model trained on English greetings only
|
13 |
+
)
|
14 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
15 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
16 |
+
|
17 |
+
|
18 |
+
class QueryClassifier(RAGStep):
|
19 |
+
def generate(self, query: Query) -> Any:
|
20 |
+
if self._mock:
|
21 |
+
return "Sources_needed"
|
22 |
+
|
23 |
+
with torch.no_grad():
|
24 |
+
inputs = tokenizer(query.content, return_tensors="pt")
|
25 |
+
logits = model(**inputs).logits
|
26 |
+
predictions = logits.argmax()
|
27 |
+
return model.config.id2label[predictions.item()]
|
rag_demo/rag/query_expansion.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
import os
|
2 |
from typing import Any
|
3 |
-
|
4 |
-
from huggingface_hub import InferenceClient
|
5 |
|
6 |
from rag_demo.rag.base.query import Query
|
7 |
from rag_demo.rag.base.template_factory import RAGStep
|
@@ -10,14 +9,12 @@ from rag_demo.rag.prompt_templates import QueryExpansionTemplate
|
|
10 |
|
11 |
class QueryExpansion(RAGStep):
|
12 |
def generate(self, query: Query, expand_to_n: int) -> Any:
|
13 |
-
api =
|
14 |
-
model="Qwen/Qwen2.5-72B-Instruct",
|
15 |
-
token=os.getenv("HF_API_TOKEN"),
|
16 |
-
)
|
17 |
query_expansion_template = QueryExpansionTemplate()
|
18 |
prompt = query_expansion_template.create_template(expand_to_n - 1)
|
19 |
-
response = api.
|
20 |
-
|
|
|
21 |
{
|
22 |
"role": "user",
|
23 |
"content": prompt.template.format(
|
@@ -26,7 +23,8 @@ class QueryExpansion(RAGStep):
|
|
26 |
separator=query_expansion_template.separator,
|
27 |
),
|
28 |
}
|
29 |
-
]
|
|
|
30 |
)
|
31 |
result = response.choices[0].message.content
|
32 |
queries_content = result.split(query_expansion_template.separator)
|
|
|
1 |
import os
|
2 |
from typing import Any
|
3 |
+
from openai import OpenAI
|
|
|
4 |
|
5 |
from rag_demo.rag.base.query import Query
|
6 |
from rag_demo.rag.base.template_factory import RAGStep
|
|
|
9 |
|
10 |
class QueryExpansion(RAGStep):
|
11 |
def generate(self, query: Query, expand_to_n: int) -> Any:
|
12 |
+
api = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
|
|
|
|
|
|
13 |
query_expansion_template = QueryExpansionTemplate()
|
14 |
prompt = query_expansion_template.create_template(expand_to_n - 1)
|
15 |
+
response = api.chat.completions.create(
|
16 |
+
model="gpt-4o-mini",
|
17 |
+
messages=[
|
18 |
{
|
19 |
"role": "user",
|
20 |
"content": prompt.template.format(
|
|
|
23 |
separator=query_expansion_template.separator,
|
24 |
),
|
25 |
}
|
26 |
+
],
|
27 |
+
max_tokens=8192,
|
28 |
)
|
29 |
result = response.choices[0].message.content
|
30 |
queries_content = result.split(query_expansion_template.separator)
|
rag_demo/rag/reranker.py
CHANGED
@@ -2,6 +2,7 @@ import os
|
|
2 |
|
3 |
from huggingface_hub import InferenceClient
|
4 |
|
|
|
5 |
from rag_demo.rag.base.query import Query
|
6 |
from rag_demo.rag.base.template_factory import RAGStep
|
7 |
from rag_demo.preprocessing.embed import EmbeddedChunk
|
|
|
2 |
|
3 |
from huggingface_hub import InferenceClient
|
4 |
|
5 |
+
|
6 |
from rag_demo.rag.base.query import Query
|
7 |
from rag_demo.rag.base.template_factory import RAGStep
|
8 |
from rag_demo.preprocessing.embed import EmbeddedChunk
|
rag_demo/rag/retriever.py
CHANGED
@@ -3,7 +3,7 @@ import os
|
|
3 |
|
4 |
from loguru import logger
|
5 |
from qdrant_client.models import FieldCondition, Filter, MatchValue
|
6 |
-
from
|
7 |
|
8 |
from rag_demo.preprocessing.base import (
|
9 |
EmbeddedChunk,
|
@@ -13,7 +13,8 @@ from rag_demo.rag.base.query import EmbeddedQuery, Query
|
|
13 |
from .query_expansion import QueryExpansion
|
14 |
from .reranker import Reranker
|
15 |
from .prompt_templates import AnswerGenerationTemplate
|
16 |
-
|
|
|
17 |
from dotenv import load_dotenv
|
18 |
|
19 |
load_dotenv()
|
@@ -29,6 +30,8 @@ class RAGPipeline:
|
|
29 |
def __init__(self, mock: bool = False) -> None:
|
30 |
self._query_expander = QueryExpansion(mock=mock)
|
31 |
self._reranker = Reranker(mock=mock)
|
|
|
|
|
32 |
|
33 |
def search(
|
34 |
self,
|
@@ -77,12 +80,13 @@ class RAGPipeline:
|
|
77 |
limit=k,
|
78 |
)
|
79 |
|
80 |
-
api =
|
81 |
-
model="intfloat/multilingual-e5-large-instruct",
|
82 |
-
token=os.getenv("HF_API_TOKEN"),
|
83 |
-
)
|
84 |
embedded_query: EmbeddedQuery = EmbeddedQuery(
|
85 |
-
embedding=api.
|
|
|
|
|
|
|
|
|
86 |
id=query.id,
|
87 |
content=query.content,
|
88 |
)
|
@@ -111,23 +115,42 @@ class RAGPipeline:
|
|
111 |
for chunk in reranked_chunks:
|
112 |
context += "\n Document: "
|
113 |
context += chunk.content
|
114 |
-
api =
|
115 |
-
model="meta-llama/Llama-3.3-70B-Instruct",
|
116 |
-
token=os.getenv("HF_API_TOKEN"),
|
117 |
-
)
|
118 |
answer_generation_template = AnswerGenerationTemplate()
|
119 |
prompt = answer_generation_template.create_template(context, query)
|
120 |
logger.info(prompt)
|
121 |
-
response = api.
|
122 |
-
|
|
|
123 |
max_tokens=8192,
|
124 |
)
|
125 |
return response.choices[0].message.content
|
126 |
|
|
|
|
|
|
|
|
|
127 |
def rag(self, query: str) -> tuple[str, list[str]]:
|
128 |
-
|
129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
return (
|
131 |
-
|
132 |
-
list(
|
|
|
|
|
|
|
|
|
133 |
)
|
|
|
3 |
|
4 |
from loguru import logger
|
5 |
from qdrant_client.models import FieldCondition, Filter, MatchValue
|
6 |
+
from openai import OpenAI
|
7 |
|
8 |
from rag_demo.preprocessing.base import (
|
9 |
EmbeddedChunk,
|
|
|
13 |
from .query_expansion import QueryExpansion
|
14 |
from .reranker import Reranker
|
15 |
from .prompt_templates import AnswerGenerationTemplate
|
16 |
+
from .source_annotator import SourceAnnotator
|
17 |
+
from .query_classifier import QueryClassifier
|
18 |
from dotenv import load_dotenv
|
19 |
|
20 |
load_dotenv()
|
|
|
30 |
def __init__(self, mock: bool = False) -> None:
|
31 |
self._query_expander = QueryExpansion(mock=mock)
|
32 |
self._reranker = Reranker(mock=mock)
|
33 |
+
self._source_annotator = SourceAnnotator()
|
34 |
+
self._query_classifier = QueryClassifier(mock=mock)
|
35 |
|
36 |
def search(
|
37 |
self,
|
|
|
80 |
limit=k,
|
81 |
)
|
82 |
|
83 |
+
api = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
|
|
|
|
|
|
84 |
embedded_query: EmbeddedQuery = EmbeddedQuery(
|
85 |
+
embedding=api.embeddings.create(
|
86 |
+
model="text-embedding-3-small", input=query.content
|
87 |
+
)
|
88 |
+
.data[0]
|
89 |
+
.embedding,
|
90 |
id=query.id,
|
91 |
content=query.content,
|
92 |
)
|
|
|
115 |
for chunk in reranked_chunks:
|
116 |
context += "\n Document: "
|
117 |
context += chunk.content
|
118 |
+
api = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
|
|
|
|
|
|
119 |
answer_generation_template = AnswerGenerationTemplate()
|
120 |
prompt = answer_generation_template.create_template(context, query)
|
121 |
logger.info(prompt)
|
122 |
+
response = api.chat.completions.create(
|
123 |
+
model="gpt-4o-mini",
|
124 |
+
messages=[{"role": "user", "content": prompt}],
|
125 |
max_tokens=8192,
|
126 |
)
|
127 |
return response.choices[0].message.content
|
128 |
|
129 |
+
def add_context(self, response: str, reranked_chunks: list[EmbeddedChunk]) -> str:
|
130 |
+
logger.info("Adding context to the answer")
|
131 |
+
return self._source_annotator.annotate(response, reranked_chunks)
|
132 |
+
|
133 |
def rag(self, query: str) -> tuple[str, list[str]]:
|
134 |
+
query_type = self._query_classifier.generate(Query.from_str(query))
|
135 |
+
logger.info(f"Query type: {query_type}")
|
136 |
+
if query_type == "Sources_needed":
|
137 |
+
docs = self.search(query, k=10)
|
138 |
+
reranked_docs = self.rerank(query, docs, keep_top_k=10)
|
139 |
+
else:
|
140 |
+
reranked_docs = []
|
141 |
+
|
142 |
+
answer = self.generate_answer(query, reranked_docs)
|
143 |
+
|
144 |
+
if reranked_docs:
|
145 |
+
annotated_answer = self.add_context(answer, reranked_docs)
|
146 |
+
else:
|
147 |
+
annotated_answer = answer
|
148 |
+
|
149 |
return (
|
150 |
+
annotated_answer,
|
151 |
+
list(
|
152 |
+
set(
|
153 |
+
[doc.metadata["filename"].split(".pdf")[0] for doc in reranked_docs]
|
154 |
+
)
|
155 |
+
),
|
156 |
)
|
rag_demo/rag/source_annotator.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from typing import Any
|
3 |
+
import re
|
4 |
+
from loguru import logger
|
5 |
+
|
6 |
+
from rag_demo.preprocessing.embed import EmbeddedChunk
|
7 |
+
|
8 |
+
|
9 |
+
from transformers import pipeline
|
10 |
+
|
11 |
+
|
12 |
+
class SourceAnnotator:
|
13 |
+
def __init__(self):
|
14 |
+
self.source_annotator = pipeline(
|
15 |
+
"question-answering",
|
16 |
+
model="distilbert/distilbert-base-cased-distilled-squad",
|
17 |
+
)
|
18 |
+
|
19 |
+
def annotate(self, response: str, reranked_chunks: list[EmbeddedChunk]) -> str:
|
20 |
+
sentences = self.split_sentences(response)
|
21 |
+
annotated_response = ""
|
22 |
+
for sentence in sentences:
|
23 |
+
scores = []
|
24 |
+
for chunk in reranked_chunks:
|
25 |
+
score = self.annotate_source(sentence.lower(), chunk.content.lower())
|
26 |
+
score["filename"] = chunk.metadata["filename"].split(".pdf")[0]
|
27 |
+
score["chunk_id"] = chunk.chunk_id
|
28 |
+
scores.append(score)
|
29 |
+
|
30 |
+
# Could also use a score cut-off instead of max()
|
31 |
+
max_score = max(scores, key=lambda x: x["score"])
|
32 |
+
|
33 |
+
annotated_response += f"{sentence} [filename: {max_score['filename']}, chunk_id: {max_score['chunk_id']}] "
|
34 |
+
|
35 |
+
return annotated_response
|
36 |
+
|
37 |
+
def split_sentences(self, text: str) -> list[str]:
|
38 |
+
pattern = r"(?<=[.!?])\s+(?=[A-Z])"
|
39 |
+
sentences = re.split(pattern, text)
|
40 |
+
return [s.strip() for s in sentences if s.strip()]
|
41 |
+
|
42 |
+
def annotate_source(self, text: str, chunk: str) -> dict:
|
43 |
+
return self.source_annotator(text, chunk)
|
rag_demo/settings.py
CHANGED
@@ -26,10 +26,9 @@ class Settings(BaseSettings):
|
|
26 |
@classmethod
|
27 |
def load_settings(cls) -> "Settings":
|
28 |
"""
|
29 |
-
|
30 |
-
|
31 |
Returns:
|
32 |
-
|
33 |
"""
|
34 |
|
35 |
settings = Settings()
|
|
|
26 |
@classmethod
|
27 |
def load_settings(cls) -> "Settings":
|
28 |
"""
|
29 |
+
Loads the settings from the .env file.
|
|
|
30 |
Returns:
|
31 |
+
Settings: The initialized settings object.
|
32 |
"""
|
33 |
|
34 |
settings = Settings()
|
templates/chat.html
CHANGED
@@ -268,9 +268,6 @@
|
|
268 |
</div>
|
269 |
<div class="main-container">
|
270 |
<div class="chat-card">
|
271 |
-
<div class="logo-container">
|
272 |
-
<img src="./static/Matriv-white.png" alt="Matriv Logo" style="width: 100px; height: auto;">
|
273 |
-
</div>
|
274 |
<div class="chat-container" id="chatContainer">
|
275 |
</div>
|
276 |
<div class="input-container">
|
|
|
268 |
</div>
|
269 |
<div class="main-container">
|
270 |
<div class="chat-card">
|
|
|
|
|
|
|
271 |
<div class="chat-container" id="chatContainer">
|
272 |
</div>
|
273 |
<div class="input-container">
|
uv.lock
CHANGED
@@ -271,6 +271,15 @@ wheels = [
|
|
271 |
{ url = "https://files.pythonhosted.org/packages/68/69/1bcf70f81de1b4a9f21b3a62ec0c83bdff991c88d6cc2267d02408457e88/dirtyjson-1.0.8-py3-none-any.whl", hash = "sha256:125e27248435a58acace26d5c2c4c11a1c0de0a9c5124c5a94ba78e517d74f53", size = 25197 },
|
272 |
]
|
273 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
274 |
[[package]]
|
275 |
name = "fastapi"
|
276 |
version = "0.115.6"
|
@@ -651,6 +660,53 @@ wheels = [
|
|
651 |
{ url = "https://files.pythonhosted.org/packages/31/80/3a54838c3fb461f6fec263ebf3a3a41771bd05190238de3486aae8540c36/jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d", size = 133271 },
|
652 |
]
|
653 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
654 |
[[package]]
|
655 |
name = "joblib"
|
656 |
version = "1.4.2"
|
@@ -1262,6 +1318,25 @@ wheels = [
|
|
1262 |
{ url = "https://files.pythonhosted.org/packages/14/56/fd990ca222cef4f9f4a9400567b9a15b220dee2eafffb16b2adbc55c8281/onnxruntime-1.20.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0df6f2df83d61f46e842dbcde610ede27218947c33e994545a22333491e72a3b", size = 13337040 },
|
1263 |
]
|
1264 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1265 |
[[package]]
|
1266 |
name = "opencv-python"
|
1267 |
version = "4.10.0.84"
|
@@ -1714,6 +1789,7 @@ dependencies = [
|
|
1714 |
{ name = "llama-parse" },
|
1715 |
{ name = "loguru" },
|
1716 |
{ name = "marker-pdf" },
|
|
|
1717 |
{ name = "pydantic" },
|
1718 |
{ name = "python-multipart" },
|
1719 |
{ name = "qdrant-client", extra = ["fastembed"] },
|
@@ -1728,6 +1804,7 @@ requires-dist = [
|
|
1728 |
{ name = "llama-parse", specifier = ">=0.5.17" },
|
1729 |
{ name = "loguru", specifier = ">=0.7.2" },
|
1730 |
{ name = "marker-pdf", specifier = ">=1.0.2" },
|
|
|
1731 |
{ name = "pydantic", specifier = ">=2.10.3" },
|
1732 |
{ name = "python-multipart", specifier = ">=0.0.19" },
|
1733 |
{ name = "qdrant-client", extras = ["fastembed"], specifier = ">=1.12.1" },
|
|
|
271 |
{ url = "https://files.pythonhosted.org/packages/68/69/1bcf70f81de1b4a9f21b3a62ec0c83bdff991c88d6cc2267d02408457e88/dirtyjson-1.0.8-py3-none-any.whl", hash = "sha256:125e27248435a58acace26d5c2c4c11a1c0de0a9c5124c5a94ba78e517d74f53", size = 25197 },
|
272 |
]
|
273 |
|
274 |
+
[[package]]
|
275 |
+
name = "distro"
|
276 |
+
version = "1.9.0"
|
277 |
+
source = { registry = "https://pypi.org/simple" }
|
278 |
+
sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722 }
|
279 |
+
wheels = [
|
280 |
+
{ url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277 },
|
281 |
+
]
|
282 |
+
|
283 |
[[package]]
|
284 |
name = "fastapi"
|
285 |
version = "0.115.6"
|
|
|
660 |
{ url = "https://files.pythonhosted.org/packages/31/80/3a54838c3fb461f6fec263ebf3a3a41771bd05190238de3486aae8540c36/jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d", size = 133271 },
|
661 |
]
|
662 |
|
663 |
+
[[package]]
|
664 |
+
name = "jiter"
|
665 |
+
version = "0.8.2"
|
666 |
+
source = { registry = "https://pypi.org/simple" }
|
667 |
+
sdist = { url = "https://files.pythonhosted.org/packages/f8/70/90bc7bd3932e651486861df5c8ffea4ca7c77d28e8532ddefe2abc561a53/jiter-0.8.2.tar.gz", hash = "sha256:cd73d3e740666d0e639f678adb176fad25c1bcbdae88d8d7b857e1783bb4212d", size = 163007 }
|
668 |
+
wheels = [
|
669 |
+
{ url = "https://files.pythonhosted.org/packages/cb/b0/c1a7caa7f9dc5f1f6cfa08722867790fe2d3645d6e7170ca280e6e52d163/jiter-0.8.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:2dd61c5afc88a4fda7d8b2cf03ae5947c6ac7516d32b7a15bf4b49569a5c076b", size = 303666 },
|
670 |
+
{ url = "https://files.pythonhosted.org/packages/f5/97/0468bc9eeae43079aaa5feb9267964e496bf13133d469cfdc135498f8dd0/jiter-0.8.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a6c710d657c8d1d2adbbb5c0b0c6bfcec28fd35bd6b5f016395f9ac43e878a15", size = 311934 },
|
671 |
+
{ url = "https://files.pythonhosted.org/packages/e5/69/64058e18263d9a5f1e10f90c436853616d5f047d997c37c7b2df11b085ec/jiter-0.8.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9584de0cd306072635fe4b89742bf26feae858a0683b399ad0c2509011b9dc0", size = 335506 },
|
672 |
+
{ url = "https://files.pythonhosted.org/packages/9d/14/b747f9a77b8c0542141d77ca1e2a7523e854754af2c339ac89a8b66527d6/jiter-0.8.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5a90a923338531b7970abb063cfc087eebae6ef8ec8139762007188f6bc69a9f", size = 355849 },
|
673 |
+
{ url = "https://files.pythonhosted.org/packages/53/e2/98a08161db7cc9d0e39bc385415890928ff09709034982f48eccfca40733/jiter-0.8.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d21974d246ed0181558087cd9f76e84e8321091ebfb3a93d4c341479a736f099", size = 381700 },
|
674 |
+
{ url = "https://files.pythonhosted.org/packages/7a/38/1674672954d35bce3b1c9af99d5849f9256ac8f5b672e020ac7821581206/jiter-0.8.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:32475a42b2ea7b344069dc1e81445cfc00b9d0e3ca837f0523072432332e9f74", size = 389710 },
|
675 |
+
{ url = "https://files.pythonhosted.org/packages/f8/9b/92f9da9a9e107d019bcf883cd9125fa1690079f323f5a9d5c6986eeec3c0/jiter-0.8.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b9931fd36ee513c26b5bf08c940b0ac875de175341cbdd4fa3be109f0492586", size = 345553 },
|
676 |
+
{ url = "https://files.pythonhosted.org/packages/44/a6/6d030003394e9659cd0d7136bbeabd82e869849ceccddc34d40abbbbb269/jiter-0.8.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ce0820f4a3a59ddced7fce696d86a096d5cc48d32a4183483a17671a61edfddc", size = 376388 },
|
677 |
+
{ url = "https://files.pythonhosted.org/packages/ad/8d/87b09e648e4aca5f9af89e3ab3cfb93db2d1e633b2f2931ede8dabd9b19a/jiter-0.8.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8ffc86ae5e3e6a93765d49d1ab47b6075a9c978a2b3b80f0f32628f39caa0c88", size = 511226 },
|
678 |
+
{ url = "https://files.pythonhosted.org/packages/77/95/8008ebe4cdc82eac1c97864a8042ca7e383ed67e0ec17bfd03797045c727/jiter-0.8.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5127dc1abd809431172bc3fbe8168d6b90556a30bb10acd5ded41c3cfd6f43b6", size = 504134 },
|
679 |
+
{ url = "https://files.pythonhosted.org/packages/26/0d/3056a74de13e8b2562e4d526de6dac2f65d91ace63a8234deb9284a1d24d/jiter-0.8.2-cp311-cp311-win32.whl", hash = "sha256:66227a2c7b575720c1871c8800d3a0122bb8ee94edb43a5685aa9aceb2782d44", size = 203103 },
|
680 |
+
{ url = "https://files.pythonhosted.org/packages/4e/1e/7f96b798f356e531ffc0f53dd2f37185fac60fae4d6c612bbbd4639b90aa/jiter-0.8.2-cp311-cp311-win_amd64.whl", hash = "sha256:cde031d8413842a1e7501e9129b8e676e62a657f8ec8166e18a70d94d4682855", size = 206717 },
|
681 |
+
{ url = "https://files.pythonhosted.org/packages/a1/17/c8747af8ea4e045f57d6cfd6fc180752cab9bc3de0e8a0c9ca4e8af333b1/jiter-0.8.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:e6ec2be506e7d6f9527dae9ff4b7f54e68ea44a0ef6b098256ddf895218a2f8f", size = 302027 },
|
682 |
+
{ url = "https://files.pythonhosted.org/packages/3c/c1/6da849640cd35a41e91085723b76acc818d4b7d92b0b6e5111736ce1dd10/jiter-0.8.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:76e324da7b5da060287c54f2fabd3db5f76468006c811831f051942bf68c9d44", size = 310326 },
|
683 |
+
{ url = "https://files.pythonhosted.org/packages/06/99/a2bf660d8ccffee9ad7ed46b4f860d2108a148d0ea36043fd16f4dc37e94/jiter-0.8.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:180a8aea058f7535d1c84183c0362c710f4750bef66630c05f40c93c2b152a0f", size = 334242 },
|
684 |
+
{ url = "https://files.pythonhosted.org/packages/a7/5f/cea1c17864828731f11427b9d1ab7f24764dbd9aaf4648a7f851164d2718/jiter-0.8.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:025337859077b41548bdcbabe38698bcd93cfe10b06ff66617a48ff92c9aec60", size = 356654 },
|
685 |
+
{ url = "https://files.pythonhosted.org/packages/e9/13/62774b7e5e7f5d5043efe1d0f94ead66e6d0f894ae010adb56b3f788de71/jiter-0.8.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ecff0dc14f409599bbcafa7e470c00b80f17abc14d1405d38ab02e4b42e55b57", size = 379967 },
|
686 |
+
{ url = "https://files.pythonhosted.org/packages/ec/fb/096b34c553bb0bd3f2289d5013dcad6074948b8d55212aa13a10d44c5326/jiter-0.8.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ffd9fee7d0775ebaba131f7ca2e2d83839a62ad65e8e02fe2bd8fc975cedeb9e", size = 389252 },
|
687 |
+
{ url = "https://files.pythonhosted.org/packages/17/61/beea645c0bf398ced8b199e377b61eb999d8e46e053bb285c91c3d3eaab0/jiter-0.8.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14601dcac4889e0a1c75ccf6a0e4baf70dbc75041e51bcf8d0e9274519df6887", size = 345490 },
|
688 |
+
{ url = "https://files.pythonhosted.org/packages/d5/df/834aa17ad5dcc3cf0118821da0a0cf1589ea7db9832589278553640366bc/jiter-0.8.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:92249669925bc1c54fcd2ec73f70f2c1d6a817928480ee1c65af5f6b81cdf12d", size = 376991 },
|
689 |
+
{ url = "https://files.pythonhosted.org/packages/67/80/87d140399d382fb4ea5b3d56e7ecaa4efdca17cd7411ff904c1517855314/jiter-0.8.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e725edd0929fa79f8349ab4ec7f81c714df51dc4e991539a578e5018fa4a7152", size = 510822 },
|
690 |
+
{ url = "https://files.pythonhosted.org/packages/5c/37/3394bb47bac1ad2cb0465601f86828a0518d07828a650722e55268cdb7e6/jiter-0.8.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bf55846c7b7a680eebaf9c3c48d630e1bf51bdf76c68a5f654b8524335b0ad29", size = 503730 },
|
691 |
+
{ url = "https://files.pythonhosted.org/packages/f9/e2/253fc1fa59103bb4e3aa0665d6ceb1818df1cd7bf3eb492c4dad229b1cd4/jiter-0.8.2-cp312-cp312-win32.whl", hash = "sha256:7efe4853ecd3d6110301665a5178b9856be7e2a9485f49d91aa4d737ad2ae49e", size = 203375 },
|
692 |
+
{ url = "https://files.pythonhosted.org/packages/41/69/6d4bbe66b3b3b4507e47aa1dd5d075919ad242b4b1115b3f80eecd443687/jiter-0.8.2-cp312-cp312-win_amd64.whl", hash = "sha256:83c0efd80b29695058d0fd2fa8a556490dbce9804eac3e281f373bbc99045f6c", size = 204740 },
|
693 |
+
{ url = "https://files.pythonhosted.org/packages/6c/b0/bfa1f6f2c956b948802ef5a021281978bf53b7a6ca54bb126fd88a5d014e/jiter-0.8.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:ca1f08b8e43dc3bd0594c992fb1fd2f7ce87f7bf0d44358198d6da8034afdf84", size = 301190 },
|
694 |
+
{ url = "https://files.pythonhosted.org/packages/a4/8f/396ddb4e292b5ea57e45ade5dc48229556b9044bad29a3b4b2dddeaedd52/jiter-0.8.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5672a86d55416ccd214c778efccf3266b84f87b89063b582167d803246354be4", size = 309334 },
|
695 |
+
{ url = "https://files.pythonhosted.org/packages/7f/68/805978f2f446fa6362ba0cc2e4489b945695940656edd844e110a61c98f8/jiter-0.8.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58dc9bc9767a1101f4e5e22db1b652161a225874d66f0e5cb8e2c7d1c438b587", size = 333918 },
|
696 |
+
{ url = "https://files.pythonhosted.org/packages/b3/99/0f71f7be667c33403fa9706e5b50583ae5106d96fab997fa7e2f38ee8347/jiter-0.8.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:37b2998606d6dadbb5ccda959a33d6a5e853252d921fec1792fc902351bb4e2c", size = 356057 },
|
697 |
+
{ url = "https://files.pythonhosted.org/packages/8d/50/a82796e421a22b699ee4d2ce527e5bcb29471a2351cbdc931819d941a167/jiter-0.8.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4ab9a87f3784eb0e098f84a32670cfe4a79cb6512fd8f42ae3d0709f06405d18", size = 379790 },
|
698 |
+
{ url = "https://files.pythonhosted.org/packages/3c/31/10fb012b00f6d83342ca9e2c9618869ab449f1aa78c8f1b2193a6b49647c/jiter-0.8.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:79aec8172b9e3c6d05fd4b219d5de1ac616bd8da934107325a6c0d0e866a21b6", size = 388285 },
|
699 |
+
{ url = "https://files.pythonhosted.org/packages/c8/81/f15ebf7de57be488aa22944bf4274962aca8092e4f7817f92ffa50d3ee46/jiter-0.8.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:711e408732d4e9a0208008e5892c2966b485c783cd2d9a681f3eb147cf36c7ef", size = 344764 },
|
700 |
+
{ url = "https://files.pythonhosted.org/packages/b3/e8/0cae550d72b48829ba653eb348cdc25f3f06f8a62363723702ec18e7be9c/jiter-0.8.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:653cf462db4e8c41995e33d865965e79641ef45369d8a11f54cd30888b7e6ff1", size = 376620 },
|
701 |
+
{ url = "https://files.pythonhosted.org/packages/b8/50/e5478ff9d82534a944c03b63bc217c5f37019d4a34d288db0f079b13c10b/jiter-0.8.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:9c63eaef32b7bebac8ebebf4dabebdbc6769a09c127294db6babee38e9f405b9", size = 510402 },
|
702 |
+
{ url = "https://files.pythonhosted.org/packages/8e/1e/3de48bbebbc8f7025bd454cedc8c62378c0e32dd483dece5f4a814a5cb55/jiter-0.8.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:eb21aaa9a200d0a80dacc7a81038d2e476ffe473ffdd9c91eb745d623561de05", size = 503018 },
|
703 |
+
{ url = "https://files.pythonhosted.org/packages/d5/cd/d5a5501d72a11fe3e5fd65c78c884e5164eefe80077680533919be22d3a3/jiter-0.8.2-cp313-cp313-win32.whl", hash = "sha256:789361ed945d8d42850f919342a8665d2dc79e7e44ca1c97cc786966a21f627a", size = 203190 },
|
704 |
+
{ url = "https://files.pythonhosted.org/packages/51/bf/e5ca301245ba951447e3ad677a02a64a8845b185de2603dabd83e1e4b9c6/jiter-0.8.2-cp313-cp313-win_amd64.whl", hash = "sha256:ab7f43235d71e03b941c1630f4b6e3055d46b6cb8728a17663eaac9d8e83a865", size = 203551 },
|
705 |
+
{ url = "https://files.pythonhosted.org/packages/2f/3c/71a491952c37b87d127790dd7a0b1ebea0514c6b6ad30085b16bbe00aee6/jiter-0.8.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b426f72cd77da3fec300ed3bc990895e2dd6b49e3bfe6c438592a3ba660e41ca", size = 308347 },
|
706 |
+
{ url = "https://files.pythonhosted.org/packages/a0/4c/c02408042e6a7605ec063daed138e07b982fdb98467deaaf1c90950cf2c6/jiter-0.8.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2dd880785088ff2ad21ffee205e58a8c1ddabc63612444ae41e5e4b321b39c0", size = 342875 },
|
707 |
+
{ url = "https://files.pythonhosted.org/packages/91/61/c80ef80ed8a0a21158e289ef70dac01e351d929a1c30cb0f49be60772547/jiter-0.8.2-cp313-cp313t-win_amd64.whl", hash = "sha256:3ac9f578c46f22405ff7f8b1f5848fb753cc4b8377fbec8470a7dc3997ca7566", size = 202374 },
|
708 |
+
]
|
709 |
+
|
710 |
[[package]]
|
711 |
name = "joblib"
|
712 |
version = "1.4.2"
|
|
|
1318 |
{ url = "https://files.pythonhosted.org/packages/14/56/fd990ca222cef4f9f4a9400567b9a15b220dee2eafffb16b2adbc55c8281/onnxruntime-1.20.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0df6f2df83d61f46e842dbcde610ede27218947c33e994545a22333491e72a3b", size = 13337040 },
|
1319 |
]
|
1320 |
|
1321 |
+
[[package]]
|
1322 |
+
name = "openai"
|
1323 |
+
version = "1.60.1"
|
1324 |
+
source = { registry = "https://pypi.org/simple" }
|
1325 |
+
dependencies = [
|
1326 |
+
{ name = "anyio" },
|
1327 |
+
{ name = "distro" },
|
1328 |
+
{ name = "httpx" },
|
1329 |
+
{ name = "jiter" },
|
1330 |
+
{ name = "pydantic" },
|
1331 |
+
{ name = "sniffio" },
|
1332 |
+
{ name = "tqdm" },
|
1333 |
+
{ name = "typing-extensions" },
|
1334 |
+
]
|
1335 |
+
sdist = { url = "https://files.pythonhosted.org/packages/4c/c4/a220c957aa4097f25498770c6eff8f3abd35934a8859e7a78928a8a70846/openai-1.60.1.tar.gz", hash = "sha256:beb1541dfc38b002bd629ab68b0d6fe35b870c5f4311d9bc4404d85af3214d5e", size = 348070 }
|
1336 |
+
wheels = [
|
1337 |
+
{ url = "https://files.pythonhosted.org/packages/7a/ad/55b2d03feda5a0adc0a86048dcb7c9863fd24a3726815a04d5669e82e41e/openai-1.60.1-py3-none-any.whl", hash = "sha256:714181ec1c452353d456f143c22db892de7b373e3165063d02a2b798ed575ba1", size = 456110 },
|
1338 |
+
]
|
1339 |
+
|
1340 |
[[package]]
|
1341 |
name = "opencv-python"
|
1342 |
version = "4.10.0.84"
|
|
|
1789 |
{ name = "llama-parse" },
|
1790 |
{ name = "loguru" },
|
1791 |
{ name = "marker-pdf" },
|
1792 |
+
{ name = "openai" },
|
1793 |
{ name = "pydantic" },
|
1794 |
{ name = "python-multipart" },
|
1795 |
{ name = "qdrant-client", extra = ["fastembed"] },
|
|
|
1804 |
{ name = "llama-parse", specifier = ">=0.5.17" },
|
1805 |
{ name = "loguru", specifier = ">=0.7.2" },
|
1806 |
{ name = "marker-pdf", specifier = ">=1.0.2" },
|
1807 |
+
{ name = "openai", specifier = ">=1.60.1" },
|
1808 |
{ name = "pydantic", specifier = ">=2.10.3" },
|
1809 |
{ name = "python-multipart", specifier = ">=0.0.19" },
|
1810 |
{ name = "qdrant-client", extras = ["fastembed"], specifier = ">=1.12.1" },
|