Spaces:
Runtime error
Runtime error
srjosueaaron
commited on
Commit
•
153d162
1
Parent(s):
79e580d
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Importa las bibliotecas necesarias
|
2 |
+
import os
|
3 |
+
import gradio as gr
|
4 |
+
import random
|
5 |
+
import requests
|
6 |
+
from getpass import getpass
|
7 |
+
from langchain.document_loaders import PyPDFLoader
|
8 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
9 |
+
from langchain.embeddings import OpenAIEmbeddings
|
10 |
+
from langchain.vectorstores import Chroma
|
11 |
+
from langchain.chat_models import ChatOpenAI
|
12 |
+
from langchain.chains import RetrievalQA
|
13 |
+
from langchain.chains.conversation.memory import ConversationBufferMemory
|
14 |
+
|
15 |
+
# Agrega el API KEY
|
16 |
+
OPENAI_API_KEY = "COLOCAR APIKEY"
|
17 |
+
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
|
18 |
+
|
19 |
+
# Lista de URLs de los documentos PDF
|
20 |
+
urls = [
|
21 |
+
# Agrega tus URLs aquí
|
22 |
+
]
|
23 |
+
|
24 |
+
# Descarga y carga los documentos PDF
|
25 |
+
pdfdoc = []
|
26 |
+
for i, url in enumerate(urls):
|
27 |
+
response = requests.get(url)
|
28 |
+
filename = f'documento{i+1}.pdf'
|
29 |
+
with open(filename, "wb") as f:
|
30 |
+
f.write(response.content)
|
31 |
+
print(f"{filename} fue descargado con éxito.")
|
32 |
+
loader = PyPDFLoader(filename)
|
33 |
+
data = loader.load()
|
34 |
+
pdfdoc.extend(data)
|
35 |
+
|
36 |
+
# Divide el texto de los documentos
|
37 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=200, length_function=len)
|
38 |
+
documents = text_splitter.split_documents(pdfdoc)
|
39 |
+
|
40 |
+
# Crea la base de datos vectorial
|
41 |
+
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
|
42 |
+
vectorstore = Chroma.from_documents(documents=documents, embedding=embeddings)
|
43 |
+
retriever = vectorstore.as_retriever(search_kwargs={"k": 2})
|
44 |
+
|
45 |
+
# Crea los modelos de chat y cadenas para consultar información
|
46 |
+
chat = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model_name="gpt-3.5-turbo", temperature=0.0)
|
47 |
+
memory = ConversationBufferMemory()
|
48 |
+
qa_chain = RetrievalQA.from_chain_type(llm=chat, chain_type="stuff", retriever=retriever, memory=memory)
|
49 |
+
|
50 |
+
# Función para chatear con el modelo
|
51 |
+
def chat_with_model(message, history):
|
52 |
+
response = generate_response_from_documents(message, documents)
|
53 |
+
if response == "No se encontró información relevante en los documentos.":
|
54 |
+
model_response = qa_chain.run(message)
|
55 |
+
response = model_response
|
56 |
+
return response
|
57 |
+
|
58 |
+
# Función para generar respuestas basadas en la información de los documentos PDF
|
59 |
+
def generate_response_from_documents(message, documents):
|
60 |
+
user_tokens = message.split()
|
61 |
+
found_responses = []
|
62 |
+
for doc_index, document in enumerate(documents):
|
63 |
+
for page_index, page_content in enumerate(document):
|
64 |
+
if any(keyword in page_content for keyword in user_tokens):
|
65 |
+
found_responses.append({"source": f"documento{doc_index + 1}.pdf", "page": page_index, "content": page_content})
|
66 |
+
if found_responses:
|
67 |
+
response = random.choice(found_responses)
|
68 |
+
return f"Encontré información en {response['source']}, página {response['page']}:\n{response['content']}"
|
69 |
+
else:
|
70 |
+
return "No se encontró información relevante en los documentos."
|
71 |
+
|
72 |
+
# Crea la interfaz Gradio
|
73 |
+
gr.ChatInterface(
|
74 |
+
chat_with_model,
|
75 |
+
chatbot=gr.Chatbot(height=200),
|
76 |
+
textbox=gr.Textbox(placeholder="Hazme una pregunta", container=False, scale=7),
|
77 |
+
title="Bienvenido a LechuzoBot",
|
78 |
+
description="En que puedo ayudarte el dia de hoy?",
|
79 |
+
theme="soft",
|
80 |
+
examples=["Que es la Universidad Tecnologica de Tecamac?", "Numero telefonico donde puedo comunicarme"],
|
81 |
+
cache_examples=True,
|
82 |
+
retry_btn=None,
|
83 |
+
undo_btn="Eliminar respuesta anterior",
|
84 |
+
clear_btn="Limpiar",
|
85 |
+
).launch()
|