Spaces:
Sleeping
Sleeping
File size: 2,777 Bytes
0eb6419 6427929 0eb6419 6427929 0eb6419 e0db20e 315655d 0eb6419 315655d e0db20e 6427929 0eb6419 315655d 0eb6419 315655d 0eb6419 908fcb9 0eb6419 315655d e0db20e 315655d 0eb6419 315655d 0eb6419 315655d 6427929 0eb6419 e0db20e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import os
from glob import glob
import openai
from dotenv import load_dotenv
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.memory import ConversationBufferMemory
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
openai.api_key = api_key
def base_model_chatbot(messages):
system_message = [
{"role": "system", "content": "You are a helpful AI chatbot that provides clear, complete, and coherent responses to User's questions. Ensure your answers are in full sentences."}
]
messages = system_message + messages
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=messages
)
return response.choices[0].message['content']
class VectorDB:
"""Class to manage document loading and vector database creation."""
def __init__(self, docs_directory: str):
self.docs_directory = docs_directory
def create_vector_db(self):
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
files = glob(os.path.join(self.docs_directory, "*.pdf"))
loadPDFs = [PyPDFLoader(pdf_file) for pdf_file in files]
pdf_docs = list()
for loader in loadPDFs:
pdf_docs.extend(loader.load())
chunks = text_splitter.split_documents(pdf_docs)
return Chroma.from_documents(chunks, OpenAIEmbeddings())
class ConversationalRetrievalChain:
"""Class to manage the QA chain setup."""
def __init__(self, model_name="gpt-3.5-turbo", temperature=0):
self.model_name = model_name
self.temperature = temperature
def create_chain(self):
model = ChatOpenAI(
model_name=self.model_name,
temperature=self.temperature,
system_prompt="You are a knowledgeable AI that answers questions based on provided documents. Always give responses in clear, complete sentences."
)
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
vector_db = VectorDB('docs/')
retriever = vector_db.create_vector_db().as_retriever(search_type="similarity", search_kwargs={"k": 2})
return RetrievalQA.from_chain_type(
llm=model,
retriever=retriever,
memory=memory,
)
def with_pdf_chatbot(messages):
query = messages[-1]['content'].strip()
qa_chain = ConversationalRetrievalChain().create_chain()
result = qa_chain({"query": query})
return result['result']
|