Spaces:
Sleeping
Sleeping
import os | |
import logging | |
import sys | |
from flask import Flask, request, jsonify, Response | |
# Inicializa o Flask | |
app = Flask(__name__) | |
logging.basicConfig(stream=sys.stdout, level=logging.INFO) | |
from llama_index.llms.openai import OpenAI | |
from llama_index.embeddings.openai import OpenAIEmbedding | |
from llama_index.core import ( | |
Settings, | |
SimpleDirectoryReader, | |
StorageContext, | |
Document, | |
) | |
Settings.llm = OpenAI(model="gpt-3.5-turbo") | |
Settings.embed_model = OpenAIEmbedding(model_name="text-embedding-3-small") | |
directory_path = "documentos" | |
from llama_index.readers.file import PDFReader #concatenar todo o documento já vem nativo no pdfreader | |
file_extractor = {".pdf": PDFReader(return_full_document = True)} | |
from drive_downloader import GoogleDriveDownloader | |
# ID da pasta no Drive e caminho local | |
folder_id = "1n34bmh9rlbOtCvE_WPZRukQilKeabWsN" | |
local_path = directory_path | |
GoogleDriveDownloader().download_from_folder(folder_id, local_path) | |
documents = SimpleDirectoryReader( | |
input_dir=directory_path, | |
file_extractor=file_extractor, | |
filename_as_id=True, | |
recursive=True | |
).load_data() | |
from document_creator import create_single_document_with_filenames | |
document = create_single_document_with_filenames(directory_path = directory_path) | |
documents.append(document) | |
#from llama_index.core.ingestion import IngestionPipeline | |
#ingestion pipeline vai entrar em uso quando adicionar o extrator de metadados | |
from llama_index.core.node_parser import SentenceSplitter | |
splitter = SentenceSplitter(chunk_size=1024, chunk_overlap=128) | |
nodes = splitter.get_nodes_from_documents(documents) | |
from llama_index.core.storage.docstore import SimpleDocumentStore | |
docstore = SimpleDocumentStore() | |
docstore.add_documents(nodes) | |
from llama_index.core import VectorStoreIndex, StorageContext | |
from llama_index.vector_stores.chroma import ChromaVectorStore | |
import chromadb | |
db = chromadb.PersistentClient(path="chroma_db") | |
chroma_collection = db.get_or_create_collection("dense_vectors") | |
vector_store = ChromaVectorStore(chroma_collection=chroma_collection) | |
storage_context = StorageContext.from_defaults( | |
docstore=docstore, vector_store=vector_store | |
) | |
index = VectorStoreIndex(nodes = nodes, storage_context=storage_context, show_progress = True) | |
storage_context.docstore.persist("./docstore.json") | |
index_retriever = index.as_retriever(similarity_top_k=2) | |
import nest_asyncio | |
nest_asyncio.apply() | |
from llama_index.retrievers.bm25 import BM25Retriever | |
bm25_retriever = BM25Retriever.from_defaults( | |
docstore=index.docstore, | |
similarity_top_k=2, | |
language = "portuguese", | |
verbose=True, | |
) | |
from llama_index.core.retrievers import QueryFusionRetriever | |
retriever = QueryFusionRetriever( | |
[index_retriever, bm25_retriever], | |
num_queries=1, #desativado = 1 | |
mode="reciprocal_rerank", | |
use_async=True, | |
verbose=True, | |
) | |
from llama_index.core.memory import ChatMemoryBuffer | |
from mysqlchatstore import MySQLChatStore | |
chat_store = MySQLChatStore.from_params( | |
host=os.getenv("MYSQL_HOST"), | |
port=os.getenv("MYSQL_PORT"), | |
user=os.getenv("MYSQL_USER"), | |
password=os.getenv("MYSQL_PASSWORD"), | |
database=os.getenv("MYSQL_DATABASE"), | |
table_name=os.getenv("MYSQL_TABLE") | |
) | |
chat_memory = ChatMemoryBuffer.from_defaults( | |
token_limit=3000, | |
chat_store=chat_store, | |
chat_store_key="Sicoob", #Tendo algumas dificuldades ainda pra passar o user | |
) | |
from llama_index.core.query_engine import RetrieverQueryEngine | |
query_engine = RetrieverQueryEngine.from_args(retriever) | |
from llama_index.core.chat_engine import CondensePlusContextChatEngine | |
chat_engine = CondensePlusContextChatEngine.from_defaults( | |
query_engine, | |
memory=chat_memory, | |
context_prompt=( | |
"Você é um assistente virtual capaz de interagir normalmente, além de" | |
" fornecer informações sobre organogramas e listar funcionários." | |
" Aqui estão os documentos relevantes para o contexto:\n" | |
"{context_str}" | |
"\nInstrução: Use o histórico da conversa anterior, ou o contexto acima, para responder." | |
"No final da resposta, depois de uma quebra de linha escreva o nome do documento que contém a informação entre dois ||, como ||Documento Nome||" | |
), | |
) | |
def chat(): | |
user_input = request.json.get("message", "") | |
if not user_input: | |
return jsonify({"error": "Mensagem vazia"}), 400 | |
def generate_response(): | |
try: | |
response = chat_engine.stream_chat(user_input) | |
for token in response.response_gen: | |
yield token # Envia cada token | |
except Exception as e: | |
yield f"Erro: {str(e)}" | |
return Response(generate_response(), content_type="text/plain") | |
if __name__ == "__main__": | |
app.run(port=5001, debug=False) | |