luanpoppe
feat: gerando documento final corretamente
e1d2a79
import os
from typing import List, Dict, Tuple, Optional, cast
from pydantic import SecretStr
from _utils.LLMs.LLM_class import LLM
from _utils.vector_stores.Vector_store_class import VectorStore
from setup.easy_imports import (
Chroma,
ChatOpenAI,
PromptTemplate,
BM25Okapi,
Response,
HuggingFaceEmbeddings,
)
import logging
from _utils.gerar_relatorio_modelo_usuario.DocumentSummarizer_simples import (
DocumentSummarizer,
)
from _utils.models.gerar_relatorio import (
RetrievalConfig,
)
from cohere import Client
from _utils.splitters.Splitter_class import Splitter
class GerarDocumento:
openai_api_key = os.environ.get("OPENAI_API_KEY", "")
cohere_api_key = os.environ.get("COHERE_API_KEY", "")
resumo_gerado = ""
def __init__(
self,
config: RetrievalConfig,
embedding_model,
chunk_size,
chunk_overlap,
num_k_rerank,
model_cohere_rerank,
# prompt_auxiliar,
gpt_model,
gpt_temperature,
# id_modelo_do_usuario,
prompt_gerar_documento,
reciprocal_rank_fusion,
):
self.config = config
self.logger = logging.getLogger(__name__)
# self.prompt_auxiliar = prompt_auxiliar
self.gpt_model = gpt_model
self.gpt_temperature = gpt_temperature
self.prompt_gerar_documento = prompt_gerar_documento
self.reciprocal_rank_fusion = reciprocal_rank_fusion
self.openai_api_key = self.openai_api_key
self.cohere_client = Client(self.cohere_api_key)
self.embeddings = HuggingFaceEmbeddings(model_name=embedding_model)
self.num_k_rerank = num_k_rerank
self.model_cohere_rerank = model_cohere_rerank
self.splitter = Splitter(chunk_size, chunk_overlap)
self.vector_store = VectorStore(embedding_model)
def retrieve_with_rank_fusion(
self, vector_store: Chroma, bm25: BM25Okapi, chunk_ids: List[str], query: str
) -> List[Dict]:
"""Combine embedding and BM25 retrieval results"""
try:
# Get embedding results
embedding_results = vector_store.similarity_search_with_score(
query, k=self.config.num_chunks
)
# Convert embedding results to list of (chunk_id, score)
embedding_list = [
(doc.metadata["chunk_id"], 1 / (1 + score))
for doc, score in embedding_results
]
# Get BM25 results
tokenized_query = query.split()
bm25_scores = bm25.get_scores(tokenized_query)
# Convert BM25 scores to list of (chunk_id, score)
bm25_list = [
(chunk_ids[i], float(score)) for i, score in enumerate(bm25_scores)
]
# Sort bm25_list by score in descending order and limit to top N results
bm25_list = sorted(bm25_list, key=lambda x: x[1], reverse=True)[
: self.config.num_chunks
]
# Normalize BM25 scores
calculo_max = max(
[score for _, score in bm25_list]
) # Criei este max() pois em alguns momentos estava vindo valores 0, e reclamava que não podia dividir por 0
max_bm25 = calculo_max if bm25_list and calculo_max else 1
bm25_list = [(doc_id, score / max_bm25) for doc_id, score in bm25_list]
# Pass the lists to rank fusion
result_lists = [embedding_list, bm25_list]
weights = [self.config.embedding_weight, self.config.bm25_weight]
combined_results = self.reciprocal_rank_fusion(
result_lists, weights=weights
)
return combined_results
except Exception as e:
self.logger.error(f"Error in rank fusion retrieval: {str(e)}")
raise
def rank_fusion_get_top_results(
self,
vector_store: Chroma,
bm25: BM25Okapi,
chunk_ids: List[str],
query: str = "Summarize the main points of this document",
):
# Get combined results using rank fusion
ranked_results = self.retrieve_with_rank_fusion(
vector_store, bm25, chunk_ids, query
)
# Prepare context and track sources
contexts = []
sources = []
# Get full documents for top results
for chunk_id, score in ranked_results[: self.config.num_chunks]:
results = vector_store.get(
where={"chunk_id": chunk_id}, include=["documents", "metadatas"]
)
if results["documents"]:
context = results["documents"][0]
metadata = results["metadatas"][0]
contexts.append(context)
sources.append(
{
"content": context,
"page": metadata["page"],
"chunk_id": chunk_id,
"relevance_score": score,
"context": metadata.get("context", ""),
}
)
return sources, contexts
def select_model_for_last_requests(self, llm_ultimas_requests: str):
llm_instance = LLM()
if llm_ultimas_requests == "gpt-4o-mini":
llm = ChatOpenAI(
temperature=self.gpt_temperature,
model=self.gpt_model,
api_key=SecretStr(self.openai_api_key),
)
elif llm_ultimas_requests == "deepseek-chat":
llm = llm_instance.deepseek()
elif llm_ultimas_requests == "gemini-2.0-flash":
llm = llm_instance.google_gemini("gemini-2.0-flash")
return llm
async def gerar_documento_final(
self,
vector_store: Chroma,
bm25: BM25Okapi,
chunk_ids: List[str],
llm_ultimas_requests: str,
query: str = "Summarize the main points of this document",
) -> List[Dict]:
try:
sources, contexts = self.rank_fusion_get_top_results(
vector_store, bm25, chunk_ids, query
)
llm = self.select_model_for_last_requests(llm_ultimas_requests)
# prompt_auxiliar = PromptTemplate(
# template=self.prompt_auxiliar, input_variables=["context"]
# )
# resumo_auxiliar_do_documento = llm.invoke(
# prompt_auxiliar.format(context="\n\n".join(contexts))
# )
# self.resumo_gerado = cast(str, resumo_auxiliar_do_documento.content)
prompt_gerar_documento = PromptTemplate(
template=self.prompt_gerar_documento,
input_variables=["context"],
)
documento_gerado = cast(
str,
llm.invoke(
prompt_gerar_documento.format(
context="\n\n".join(contexts),
# modelo_usuario=serializer.data["modelo"],
)
).content,
)
# Split the response into paragraphs
summaries = [p.strip() for p in documento_gerado.split("\n\n") if p.strip()]
# Create structured output
structured_output = []
for idx, summary in enumerate(summaries):
source_idx = min(idx, len(sources) - 1)
structured_output.append(
{
"content": summary,
"source": {
"page": sources[source_idx]["page"],
"text": sources[source_idx]["content"][:200] + "...",
"context": sources[source_idx]["context"],
"relevance_score": sources[source_idx]["relevance_score"],
"chunk_id": sources[source_idx]["chunk_id"],
},
}
)
return structured_output
except Exception as e:
self.logger.error(f"Error generating enhanced summary: {str(e)}")
raise