Spaces:
Sleeping
Sleeping
import os | |
from glob import glob | |
import openai | |
from dotenv import load_dotenv | |
from langchain.embeddings import OpenAIEmbeddings | |
from langchain.vectorstores import Chroma | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_community.chat_models import ChatOpenAI | |
from langchain.chains import RetrievalQA | |
from langchain.memory import ConversationBufferMemory | |
load_dotenv() | |
api_key = os.getenv("OPENAI_API_KEY") | |
openai.api_key = api_key | |
# Helper function to validate response completeness | |
def is_response_complete(response: str) -> bool: | |
return response.strip()[-1] in ".!?" | |
# Retry mechanism for incomplete responses | |
def retry_response(messages): | |
response = openai.ChatCompletion.create( | |
model="gpt-3.5-turbo", | |
messages=messages | |
).choices[0].message['content'] | |
if not is_response_complete(response): | |
response += " This is the end of the response. Please let me know if you need further clarification." | |
return response | |
def base_model_chatbot(messages): | |
system_message = [ | |
{"role": "system", "content": "You are a helpful AI chatbot that provides clear, complete, and coherent responses to User's questions. Ensure your answers are in full sentences and complete the thought or idea."} | |
] | |
messages = system_message + messages | |
response = openai.ChatCompletion.create( | |
model="gpt-3.5-turbo", | |
messages=messages | |
).choices[0].message['content'] | |
# Validate response completeness | |
if not is_response_complete(response): | |
response = retry_response(messages) | |
return response | |
class VectorDB: | |
"""Class to manage document loading and vector database creation.""" | |
def __init__(self, docs_directory: str): | |
self.docs_directory = docs_directory | |
def create_vector_db(self): | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) | |
files = glob(os.path.join(self.docs_directory, "*.pdf")) | |
loadPDFs = [PyPDFLoader(pdf_file) for pdf_file in files] | |
pdf_docs = list() | |
for loader in loadPDFs: | |
pdf_docs.extend(loader.load()) | |
chunks = text_splitter.split_documents(pdf_docs) | |
return Chroma.from_documents(chunks, OpenAIEmbeddings()) | |
class ConversationalRetrievalChain: | |
"""Class to manage the QA chain setup.""" | |
def __init__(self, model_name="gpt-3.5-turbo", temperature=0): | |
self.model_name = model_name | |
self.temperature = temperature | |
def create_chain(self): | |
model = ChatOpenAI( | |
model_name=self.model_name, | |
temperature=self.temperature, | |
system_prompt="You are a knowledgeable AI that answers questions based on provided documents. Always give responses in clear, complete sentences." | |
) | |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) | |
vector_db = VectorDB('docs/') | |
retriever = vector_db.create_vector_db().as_retriever(search_type="similarity", search_kwargs={"k": 2}) | |
return RetrievalQA.from_chain_type( | |
llm=model, | |
retriever=retriever, | |
memory=memory, | |
) | |
def with_pdf_chatbot(messages): | |
query = messages[-1]['content'].strip() | |
qa_chain = ConversationalRetrievalChain().create_chain() | |
result = qa_chain({"query": query}) | |
if not is_response_complete(result['result']): | |
result['result'] += " This is the end of the response. Let me know if you need further clarification." | |
return result['result'] |